hai 4 meses · 9814370659
--- a/base/intrinsics/intrinsics.odin
+++ b/base/intrinsics/intrinsics.odin
@@ -298,7 +298,7 @@ simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)
 
															 simd_masked_expand_load    :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
														
 
															 simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)              where type_is_integer(U) || type_is_boolean(U) ---
														
 
															-
														
 
															+simd_indices :: proc($T: typeid/#simd[$N]$E) -> T where type_is_numeric(T) ---
														
 
															 simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
														
 
															 simd_select  :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
														
--- a/core/bufio/reader.odin
+++ b/core/bufio/reader.odin
@@ -257,7 +257,7 @@ reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
 
															 	for b.r+utf8.UTF_MAX > b.w &&
														
 
															 	    !utf8.full_rune(b.buf[b.r:b.w]) &&
														
 
															 	    b.err == nil &&
														
 
															-	    b.w-b.w < len(b.buf) {
														
 
															+	    b.w-b.r < len(b.buf) {
														
 
															 		_reader_read_new_chunk(b) or_return
														
 
															 	}
														
--- a/core/os/os2/file_windows.odin
+++ b/core/os/os2/file_windows.odin
@@ -12,7 +12,30 @@ import win32 "core:sys/windows"
 
															 INVALID_HANDLE :: ~uintptr(0)
														
 
															-S_IWRITE :: 0o200
														
 
															+// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`.
														
 
															+//               Let's not do so for Windows for `chmod` or `read_directory_iterator` either.
														
 
															+//               They're *not* portable between Windows and non-Windows platforms.
														
 
															+//
														
 
															+//               It also leads to information loss as flags like Archive, Hidden and System have no equivalent there.
														
 
															+//               We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend
														
 
															+//               that 0o644 is meaningful when returned as a mode.
														
 
															+//               `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444.
														
 
															+FILE_ATTRIBUTE_READONLY            :: win32.FILE_ATTRIBUTE_READONLY            // 0x00000001
														
 
															+FILE_ATTRIBUTE_HIDDEN              :: win32.FILE_ATTRIBUTE_HIDDEN              // 0x00000002
														
 
															+FILE_ATTRIBUTE_SYSTEM              :: win32.FILE_ATTRIBUTE_SYSTEM              // 0x00000004
														
 
															+FILE_ATTRIBUTE_DIRECTORY           :: win32.FILE_ATTRIBUTE_DIRECTORY           // 0x00000010
														
 
															+FILE_ATTRIBUTE_ARCHIVE             :: win32.FILE_ATTRIBUTE_ARCHIVE             // 0x00000020
														
 
															+FILE_ATTRIBUTE_DEVICE              :: win32.FILE_ATTRIBUTE_DEVICE              // 0x00000040
														
 
															+FILE_ATTRIBUTE_NORMAL              :: win32.FILE_ATTRIBUTE_NORMAL              // 0x00000080
														
 
															+FILE_ATTRIBUTE_TEMPORARY           :: win32.FILE_ATTRIBUTE_TEMPORARY           // 0x00000100
														
 
															+FILE_ATTRIBUTE_SPARSE_FILE         :: win32.FILE_ATTRIBUTE_SPARSE_FILE         // 0x00000200
														
 
															+FILE_ATTRIBUTE_REPARSE_Point       :: win32.FILE_ATTRIBUTE_REPARSE_Point       // 0x00000400
														
 
															+FILE_ATTRIBUTE_REPARSE_POINT       :: win32.FILE_ATTRIBUTE_REPARSE_POINT       // 0x00000400
														
 
															+FILE_ATTRIBUTE_COMPRESSED          :: win32.FILE_ATTRIBUTE_COMPRESSED          // 0x00000800
														
 
															+FILE_ATTRIBUTE_OFFLINE             :: win32.FILE_ATTRIBUTE_OFFLINE             // 0x00001000
														
 
															+FILE_ATTRIBUTE_NOT_CONTENT_INDEXED :: win32.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED // 0x00002000
														
 
															+FILE_ATTRIBUTE_ENCRYPTED           :: win32.FILE_ATTRIBUTE_ENCRYPTED           // 0x00004000
														
 
															+
														
 
															 _ERROR_BAD_NETPATH :: 53
														
 
															 MAX_RW :: 1<<30
														
@@ -122,7 +145,7 @@ _open_internal :: proc(name: string, flags: File_Flags, perm: int) -> (handle: u
 
															 	}
														
 
															 	attrs: u32 = win32.FILE_ATTRIBUTE_NORMAL|win32.FILE_FLAG_BACKUP_SEMANTICS
														
 
															-	if perm & S_IWRITE == 0 {
														
 
															+	if u32(perm) & FILE_ATTRIBUTE_NORMAL == 0 {
														
 
															 		attrs = win32.FILE_ATTRIBUTE_READONLY
														
 
															 		if create_mode == win32.CREATE_ALWAYS {
														
 
															 			// NOTE(bill): Open has just asked to create a file in read-only mode.
														
@@ -748,20 +771,10 @@ _fchmod :: proc(f: ^File, mode: int) -> Error {
 
															 	if f == nil || f.impl == nil {
														
 
															 		return nil
														
 
															 	}
														
 
															-	d: win32.BY_HANDLE_FILE_INFORMATION
														
 
															-	if !win32.GetFileInformationByHandle(_handle(f), &d) {
														
 
															-		return _get_platform_error()
														
 
															-	}
														
 
															-	attrs := d.dwFileAttributes
														
 
															-	if mode & S_IWRITE != 0 {
														
 
															-		attrs &~= win32.FILE_ATTRIBUTE_READONLY
														
 
															-	} else {
														
 
															-		attrs |= win32.FILE_ATTRIBUTE_READONLY
														
 
															-	}
														
 
															 	info: win32.FILE_BASIC_INFO
														
 
															-	info.FileAttributes = attrs
														
 
															-	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) {
														
 
															+	info.FileAttributes = win32.DWORD(mode)
														
 
															+	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) {
														
 
															 		return _get_platform_error()
														
 
															 	}
														
 
															 	return nil
														
@@ -800,19 +813,11 @@ _chtimes :: proc(name: string, atime, mtime: time.Time) -> Error {
 
															 	defer close(f)
														
 
															 	return _fchtimes(f, atime, mtime)
														
 
															 }
														
 
															+
														
 
															 _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error {
														
 
															 	if f == nil || f.impl == nil {
														
 
															 		return nil
														
 
															 	}
														
 
															-	d: win32.BY_HANDLE_FILE_INFORMATION
														
 
															-	if !win32.GetFileInformationByHandle(_handle(f), &d) {
														
 
															-		return _get_platform_error()
														
 
															-	}
														
 
															-
														
 
															-	to_windows_time :: #force_inline proc(t: time.Time) -> win32.LARGE_INTEGER {
														
 
															-		// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)
														
 
															-		return win32.LARGE_INTEGER(time.time_to_unix_nano(t) * 100 + 116444736000000000)
														
 
															-	}
														
 
															 	atime, mtime := atime, mtime
														
 
															 	if time.time_to_unix_nano(atime) < time.time_to_unix_nano(mtime) {
														
@@ -820,9 +825,9 @@ _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error {
 
															 	}
														
 
															 	info: win32.FILE_BASIC_INFO
														
 
															-	info.LastAccessTime = to_windows_time(atime)
														
 
															-	info.LastWriteTime  = to_windows_time(mtime)
														
 
															-	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) {
														
 
															+	info.LastAccessTime = time_as_filetime(atime)
														
 
															+	info.LastWriteTime  = time_as_filetime(mtime)
														
 
															+	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) {
														
 
															 		return _get_platform_error()
														
 
															 	}
														
 
															 	return nil
														
--- a/core/os/os2/process_linux.odin
+++ b/core/os/os2/process_linux.odin
@@ -162,7 +162,7 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
															 		}
														
 
															 	}
														
 
															-	cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args, .Executable_Path} != {} {
														
 
															+	cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args} != {} {
														
 
															 		strings.builder_reset(&path_builder)
														
 
															 		strings.write_string(&path_builder, "/proc/")
														
 
															 		strings.write_int(&path_builder, pid)
														
@@ -178,12 +178,12 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
															 		terminator := strings.index_byte(cmdline, 0)
														
 
															 		assert(terminator > 0)
														
 
															-		command_line_exec := cmdline[:terminator]
														
 
															+		// command_line_exec := cmdline[:terminator]
														
 
															 		// Still need cwd if the execution on the command line is relative.
														
 
															 		cwd: string
														
 
															 		cwd_err: Error
														
 
															-		if .Working_Dir in selection || (.Executable_Path in selection && command_line_exec[0] != '/') {
														
 
															+		if .Working_Dir in selection {
														
 
															 			strings.builder_reset(&path_builder)
														
 
															 			strings.write_string(&path_builder, "/proc/")
														
 
															 			strings.write_int(&path_builder, pid)
														
@@ -199,18 +199,6 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
															 			}
														
 
															 		}
														
 
															-		if .Executable_Path in selection {
														
 
															-			if cmdline[0] == '/' {
														
 
															-				info.executable_path = strings.clone(cmdline[:terminator], allocator) or_return
														
 
															-				info.fields += {.Executable_Path}
														
 
															-			} else if cwd_err == nil {
														
 
															-				info.executable_path = join_path({ cwd, cmdline[:terminator] }, allocator) or_return
														
 
															-				info.fields += {.Executable_Path}
														
 
															-			} else {
														
 
															-				break cmdline_if
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															 		if selection & {.Command_Line, .Command_Args} != {} {
														
 
															 			// skip to first arg
														
 
															 			//cmdline = cmdline[terminator + 1:]
														
@@ -323,6 +311,30 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
															 		}
														
 
															 	}
														
 
															+	if .Executable_Path in selection {
														
 
															+		/*
														
 
															+		NOTE(Jeroen):
														
 
															+
														
 
															+		The old version returned the wrong executable path for things like `bash` or `sh`,
														
 
															+		for whom `/proc/<pid>/cmdline` will just report "bash" or "sh",
														
 
															+		resulting in misleading paths like `$PWD/sh`, even though that executable doesn't exist there.
														
 
															+
														
 
															+		Thanks to Yawning for suggesting `/proc/self/exe`.
														
 
															+		*/
														
 
															+
														
 
															+		strings.builder_reset(&path_builder)
														
 
															+		strings.write_string(&path_builder, "/proc/")
														
 
															+		strings.write_int(&path_builder, pid)
														
 
															+		strings.write_string(&path_builder, "/exe")
														
 
															+
														
 
															+		if exe_bytes, exe_err := _read_link(strings.to_string(path_builder), temp_allocator()); exe_err == nil {
														
 
															+			info.executable_path = strings.clone(string(exe_bytes), allocator) or_return
														
 
															+			info.fields += {.Executable_Path}
														
 
															+		} else {
														
 
															+			err = exe_err
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	if .Environment in selection {
														
 
															 		strings.builder_reset(&path_builder)
														
 
															 		strings.write_string(&path_builder, "/proc/")
														
--- a/core/os/os2/stat_windows.odin
+++ b/core/os/os2/stat_windows.odin
@@ -212,11 +212,15 @@ _file_type_from_create_file :: proc(wname: win32.wstring, create_file_attributes
 
															 }
														
 
															 _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: win32.HANDLE, ReparseTag: win32.DWORD) -> (type: File_Type, mode: int) {
														
 
															-	if file_attributes & win32.FILE_ATTRIBUTE_READONLY != 0 {
														
 
															-		mode |= 0o444
														
 
															-	} else {
														
 
															-		mode |= 0o666
														
 
															-	}
														
 
															+	// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`.
														
 
															+	//               Let's not do so for Windows for `chmod` or `read_directory_iterator` either.
														
 
															+	//               They're *not* portable between Windows and non-Windows platforms.
														
 
															+	//
														
 
															+	//               It also leads to information loss as flags like Archive, Hidden and System have no equivalent there.
														
 
															+	//               We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend
														
 
															+	//               that 0o644 is meaningful when returned as a mode.
														
 
															+	//               `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444.
														
 
															+	mode = int(file_attributes)
														
 
															 	is_sym := false
														
 
															 	if file_attributes & win32.FILE_ATTRIBUTE_REPARSE_POINT == 0 {
														
@@ -229,21 +233,36 @@ _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: wi
 
															 		type = .Symlink
														
 
															 	} else if file_attributes & win32.FILE_ATTRIBUTE_DIRECTORY != 0 {
														
 
															 		type = .Directory
														
 
															-		mode |= 0o111
														
 
															 	} else if h != nil {
														
 
															 		type = file_type(h)
														
 
															 	}
														
 
															 	return
														
 
															 }
														
 
															+// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)
														
 
															+time_as_filetime :: #force_inline proc(t: time.Time) -> (ft: win32.LARGE_INTEGER) {
														
 
															+	win := u64(t._nsec / 100) + 116444736000000000
														
 
															+	return win32.LARGE_INTEGER(win)
														
 
															+}
														
 
															+
														
 
															+filetime_as_time_li :: #force_inline proc(ft: win32.LARGE_INTEGER) -> (t: time.Time) {
														
 
															+	return {_nsec=(i64(ft) - 116444736000000000) * 100}
														
 
															+}
														
 
															+
														
 
															+filetime_as_time_ft :: #force_inline proc(ft: win32.FILETIME) -> (t: time.Time) {
														
 
															+	return filetime_as_time_li(win32.LARGE_INTEGER(ft.dwLowDateTime) + win32.LARGE_INTEGER(ft.dwHighDateTime) << 32)
														
 
															+}
														
 
															+
														
 
															+filetime_as_time :: proc{filetime_as_time_ft, filetime_as_time_li}
														
 
															+
														
 
															 _file_info_from_win32_file_attribute_data :: proc(d: ^win32.WIN32_FILE_ATTRIBUTE_DATA, name: string, allocator: runtime.Allocator) -> (fi: File_Info, e: Error) {
														
 
															 	fi.size = i64(d.nFileSizeHigh)<<32 + i64(d.nFileSizeLow)
														
 
															 	type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0)
														
 
															 	fi.type = type
														
 
															 	fi.mode |= mode
														
 
															-	fi.creation_time     = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
														
 
															-	fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
														
 
															-	fi.access_time       = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
														
 
															+	fi.creation_time     = filetime_as_time(d.ftCreationTime)
														
 
															+	fi.modification_time = filetime_as_time(d.ftLastWriteTime)
														
 
															+	fi.access_time       = filetime_as_time(d.ftLastAccessTime)
														
 
															 	fi.fullpath, e = full_path_from_name(name, allocator)
														
 
															 	fi.name = basename(fi.fullpath)
														
 
															 	return
														
@@ -254,9 +273,9 @@ _file_info_from_win32_find_data :: proc(d: ^win32.WIN32_FIND_DATAW, name: string
 
															 	type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0)
														
 
															 	fi.type = type
														
 
															 	fi.mode |= mode
														
 
															-	fi.creation_time     = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
														
 
															-	fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
														
 
															-	fi.access_time       = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
														
 
															+	fi.creation_time     = filetime_as_time(d.ftCreationTime)
														
 
															+	fi.modification_time = filetime_as_time(d.ftLastWriteTime)
														
 
															+	fi.access_time       = filetime_as_time(d.ftLastAccessTime)
														
 
															 	fi.fullpath, e = full_path_from_name(name, allocator)
														
 
															 	fi.name = basename(fi.fullpath)
														
 
															 	return
														
@@ -286,9 +305,9 @@ _file_info_from_get_file_information_by_handle :: proc(path: string, h: win32.HA
 
															 	type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, h, 0)
														
 
															 	fi.type = type
														
 
															 	fi.mode |= mode
														
 
															-	fi.creation_time     = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
														
 
															-	fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
														
 
															-	fi.access_time       = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
														
 
															+	fi.creation_time     = filetime_as_time(d.ftCreationTime)
														
 
															+	fi.modification_time = filetime_as_time(d.ftLastWriteTime)
														
 
															+	fi.access_time       = filetime_as_time(d.ftLastAccessTime)
														
 
															 	return fi, nil
														
 
															 }
														
--- a/core/simd/simd.odin
+++ b/core/simd/simd.odin
@@ -1759,7 +1759,7 @@ Returns:
 
															 replace :: intrinsics.simd_replace
														
 
															 /*
														
 
															-Reduce a vector to a scalar by adding up all the lanes in an ordered fashion.
														
 
															+Reduce a vector to a scalar by adding up all the lanes.
														
 
															 This procedure returns a scalar that is the ordered sum of all lanes. The
														
 
															 ordered sum may be important for accounting for precision errors in
														
@@ -2511,460 +2511,16 @@ recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where int
 
															 	return T(1) / v
														
 
															 }
														
 
															+
														
 
															 /*
														
 
															 Create a vector where each lane contains the index of that lane.
														
 
															-
														
 
															 Inputs:
														
 
															 - `V`: The type of the vector to create.
														
 
															-
														
 
															 Result:
														
 
															 - A vector of the given type, where each lane contains the index of that lane.
														
 
															-
														
 
															 **Operation**:
														
 
															-
														
 
															 	for i in 0 ..< N {
														
 
															 		res[i] = i
														
 
															 	}
														
 
															 */
														
 
															-indices :: #force_inline proc "contextless" ($V: typeid/#simd[$N]$E) -> V where intrinsics.type_is_numeric(E) {
														
 
															-	when N == 1 {
														
 
															-		return {0}
														
 
															-	} else when N == 2 {
														
 
															-		return {0, 1}
														
 
															-	} else when N == 4 {
														
 
															-		return {0, 1, 2, 3}
														
 
															-	} else when N == 8 {
														
 
															-		return {0, 1, 2, 3, 4, 5, 6, 7}
														
 
															-	} else when N == 16 {
														
 
															-		return {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
														
 
															-	} else when N == 32 {
														
 
															-		return {
														
 
															-			0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
														
 
															-			16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
														
 
															-		}
														
 
															-	} else when N == 64 {
														
 
															-		return {
														
 
															-			0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
														
 
															-			16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
														
 
															-			32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
														
 
															-			48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
														
 
															-		}
														
 
															-	} else {
														
 
															-		#panic("Unsupported vector size!")
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															-Reduce a vector to a scalar by adding up all the lanes in a pairwise fashion.
														
 
															-
														
 
															-This procedure returns a scalar that is the sum of all lanes, calculated by
														
 
															-adding each even-indexed element with the following odd-indexed element to
														
 
															-produce N/2 values. This is repeated until only a single element remains. This
														
 
															-order is supported by hardware instructions for some types/architectures (e.g.
														
 
															-i16/i32/f32/f64 on x86 SSE, i8/i16/i32/f32 on ARM NEON).
														
 
															-
														
 
															-The order of the sum may be important for accounting for precision errors in
														
 
															-floating-point computation, as floating-point addition is not associative, that
														
 
															-is `(a+b)+c` may not be equal to `a+(b+c)`.
														
 
															-
														
 
															-Inputs:
														
 
															-- `v`: The vector to reduce.
														
 
															-
														
 
															-Result:
														
 
															-- Sum of all lanes, as a scalar.
														
 
															-
														
 
															-**Operation**:
														
 
															-
														
 
															-	for n > 1 {
														
 
															-		n = n / 2
														
 
															-		for i in 0 ..< n {
														
 
															-			a[i] = a[2*i+0] + a[2*i+1]
														
 
															-		}
														
 
															-	}
														
 
															-	res := a[0]
														
 
															-
														
 
															-Graphical representation of the operation for N=4:
														
 
															-
														
 
															-	   +-----------------------+
														
 
															-	v: | v0  | v1  | v2  | v3  |
														
 
															-	   +-----------------------+
														
 
															-	      |     |     |     |
														
 
															-	      `>[+]<'     `>[+]<'
														
 
															-	         |           |
														
 
															-	         `--->[+]<--'
														
 
															-	               |
														
 
															-	               v
														
 
															-	            +-----+
														
 
															-	    result: | y0  |
														
 
															-	            +-----+
														
 
															-*/
														
 
															-reduce_add_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
														
 
															-	where intrinsics.type_is_numeric(E) {
														
 
															-	when N == 64 { v64 := v }
														
 
															-	when N == 32 { v32 := v }
														
 
															-	when N == 16 { v16 := v }
														
 
															-	when N == 8  { v8 := v }
														
 
															-	when N == 4  { v4 := v }
														
 
															-	when N == 2  { v2 := v }
														
 
															-
														
 
															-	when N >= 64 {
														
 
															-		x32 := swizzle(v64,
														
 
															-			0,  2,  4,  6,  8,  10, 12, 14,
														
 
															-			16, 18, 20, 22, 24, 26, 28, 30,
														
 
															-			32, 34, 36, 38, 40, 42, 44, 46,
														
 
															-			48, 50, 52, 54, 56, 58, 60, 62)
														
 
															-		y32 := swizzle(v64,
														
 
															-			1,  3,  5,  7,  9,  11, 13, 15,
														
 
															-			17, 19, 21, 23, 25, 27, 29, 31,
														
 
															-			33, 35, 37, 39, 41, 43, 45, 47,
														
 
															-			49, 51, 53, 55, 57, 59, 61, 63)
														
 
															-		v32 := x32 + y32
														
 
															-	}
														
 
															-
														
 
															-	when N >= 32 {
														
 
															-		x16 := swizzle(v32,
														
 
															-			0,  2,  4,  6,  8,  10, 12, 14,
														
 
															-			16, 18, 20, 22, 24, 26, 28, 30)
														
 
															-		y16 := swizzle(v32,
														
 
															-			1,  3,  5,  7,  9,  11, 13, 15,
														
 
															-			17, 19, 21, 23, 25, 27, 29, 31)
														
 
															-		v16 := x16 + y16
														
 
															-	}
														
 
															-
														
 
															-	when N >= 16 {
														
 
															-		x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14)
														
 
															-		y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15)
														
 
															-		v8 := x8 + y8
														
 
															-	}
														
 
															-
														
 
															-	when N >= 8 {
														
 
															-		x4 := swizzle(v8, 0, 2, 4, 6)
														
 
															-		y4 := swizzle(v8, 1, 3, 5, 7)
														
 
															-		v4 := x4 + y4
														
 
															-	}
														
 
															-
														
 
															-	when N >= 4 {
														
 
															-		x2 := swizzle(v4, 0, 2)
														
 
															-		y2 := swizzle(v4, 1, 3)
														
 
															-		v2 := x2 + y2
														
 
															-	}
														
 
															-
														
 
															-	when N >= 2 {
														
 
															-		return extract(v2, 0) + extract(v2, 1)
														
 
															-	} else {
														
 
															-		return extract(v, 0)
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															-Reduce a vector to a scalar by adding up all the lanes in a bisecting fashion.
														
 
															-
														
 
															-This procedure returns a scalar that is the sum of all lanes, calculated by
														
 
															-bisecting the vector into two parts, where the first contains lanes [0, N/2)
														
 
															-and the second contains lanes [N/2, N), and adding the two halves element-wise
														
 
															-to produce N/2 values. This is repeated until only a single element remains.
														
 
															-This order may be faster to compute than the ordered sum for floats, as it can
														
 
															-often be better parallelized.
														
 
															-
														
 
															-The order of the sum may be important for accounting for precision errors in
														
 
															-floating-point computation, as floating-point addition is not associative, that
														
 
															-is `(a+b)+c` may not be equal to `a+(b+c)`.
														
 
															-
														
 
															-Inputs:
														
 
															-- `v`: The vector to reduce.
														
 
															-
														
 
															-Result:
														
 
															-- Sum of all lanes, as a scalar.
														
 
															-
														
 
															-**Operation**:
														
 
															-
														
 
															-	for n > 1 {
														
 
															-		n = n / 2
														
 
															-		for i in 0 ..< n {
														
 
															-			a[i] += a[i+n]
														
 
															-		}
														
 
															-	}
														
 
															-	res := a[0]
														
 
															-
														
 
															-Graphical representation of the operation for N=4:
														
 
															-
														
 
															-	     +-----------------------+
														
 
															-	     | v0  | v1  | v2  | v3  |
														
 
															-	     +-----------------------+
														
 
															-	        |     |     |     |
														
 
															-	       [+]<-- | ---'      |
														
 
															-	        |    [+]<--------'
														
 
															-	        |     |
														
 
															-	        `>[+]<'
														
 
															-	           |
														
 
															-	           v
														
 
															-	        +-----+
														
 
															-	result: | y0  |
														
 
															-	        +-----+
														
 
															-*/
														
 
															-reduce_add_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
														
 
															-	where intrinsics.type_is_numeric(E) {
														
 
															-	when N == 64 { v64 := v }
														
 
															-	when N == 32 { v32 := v }
														
 
															-	when N == 16 { v16 := v }
														
 
															-	when N == 8  { v8 := v }
														
 
															-	when N == 4  { v4 := v }
														
 
															-	when N == 2  { v2 := v }
														
 
															-
														
 
															-	when N >= 64 {
														
 
															-		x32 := swizzle(v64,
														
 
															-			0,  1,  2,  3,  4,  5,  6,  7,
														
 
															-			8,  9,  10, 11, 12, 13, 14, 15,
														
 
															-			16, 17, 18, 19, 20, 21, 22, 23,
														
 
															-			24, 25, 26, 27, 28, 29, 30, 31)
														
 
															-		y32 := swizzle(v64,
														
 
															-			32, 33, 34, 35, 36, 37, 38, 39,
														
 
															-			40, 41, 42, 43, 44, 45, 46, 47,
														
 
															-			48, 49, 50, 51, 52, 53, 54, 55,
														
 
															-			56, 57, 58, 59, 60, 61, 62, 63)
														
 
															-		v32 := x32 + y32
														
 
															-	}
														
 
															-
														
 
															-	when N >= 32 {
														
 
															-		x16 := swizzle(v32,
														
 
															-			0,  1,  2,  3,  4,  5,  6,  7,
														
 
															-			8,  9,  10, 11, 12, 13, 14, 15)
														
 
															-		y16 := swizzle(v32,
														
 
															-			16, 17, 18, 19, 20, 21, 22, 23,
														
 
															-			24, 25, 26, 27, 28, 29, 30, 31)
														
 
															-		v16 := x16 + y16
														
 
															-	}
														
 
															-
														
 
															-	when N >= 16 {
														
 
															-		x8 := swizzle(v16, 0, 1, 2,  3,  4,  5,  6,  7)
														
 
															-		y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15)
														
 
															-		v8 := x8 + y8
														
 
															-	}
														
 
															-
														
 
															-	when N >= 8 {
														
 
															-		x4 := swizzle(v8, 0, 1, 2, 3)
														
 
															-		y4 := swizzle(v8, 4, 5, 6, 7)
														
 
															-		v4 := x4 + y4
														
 
															-	}
														
 
															-
														
 
															-	when N >= 4 {
														
 
															-		x2 := swizzle(v4, 0, 1)
														
 
															-		y2 := swizzle(v4, 2, 3)
														
 
															-		v2 := x2 + y2
														
 
															-	}
														
 
															-
														
 
															-	when N >= 2 {
														
 
															-		return extract(v2, 0) + extract(v2, 1)
														
 
															-	} else {
														
 
															-		return extract(v, 0)
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															-Reduce a vector to a scalar by multiplying all the lanes in a pairwise fashion.
														
 
															-
														
 
															-This procedure returns a scalar that is the product of all lanes, calculated by
														
 
															-bisecting the vector into two parts, where the first contains lanes [0, N/2)
														
 
															-and the second contains lanes [N/2, N), and multiplying the two halves together
														
 
															-multiplying each even-indexed element with the following odd-indexed element to
														
 
															-produce N/2 values. This is repeated until only a single element remains. This
														
 
															-order may be faster to compute than the ordered product for floats, as it can
														
 
															-often be better parallelized.
														
 
															-
														
 
															-The order of the product may be important for accounting for precision errors
														
 
															-in floating-point computation, as floating-point multiplication is not
														
 
															-associative, that is `(a*b)*c` may not be equal to `a*(b*c)`.
														
 
															-
														
 
															-Inputs:
														
 
															-- `v`: The vector to reduce.
														
 
															-
														
 
															-Result:
														
 
															-- Product of all lanes, as a scalar.
														
 
															-
														
 
															-**Operation**:
														
 
															-
														
 
															-	for n > 1 {
														
 
															-		n = n / 2
														
 
															-		for i in 0 ..< n {
														
 
															-			a[i] = a[2*i+0] * a[2*i+1]
														
 
															-		}
														
 
															-	}
														
 
															-	res := a[0]
														
 
															-
														
 
															-Graphical representation of the operation for N=4:
														
 
															-
														
 
															-	   +-----------------------+
														
 
															-	v: | v0  | v1  | v2  | v3  |
														
 
															-	   +-----------------------+
														
 
															-	      |     |     |     |
														
 
															-	      `>[x]<'     `>[x]<'
														
 
															-	         |           |
														
 
															-	         `--->[x]<--'
														
 
															-	               |
														
 
															-	               v
														
 
															-	            +-----+
														
 
															-	    result: | y0  |
														
 
															-	            +-----+
														
 
															-*/
														
 
															-reduce_mul_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
														
 
															-	where intrinsics.type_is_numeric(E) {
														
 
															-	when N == 64 { v64 := v }
														
 
															-	when N == 32 { v32 := v }
														
 
															-	when N == 16 { v16 := v }
														
 
															-	when N == 8  { v8 := v }
														
 
															-	when N == 4  { v4 := v }
														
 
															-	when N == 2  { v2 := v }
														
 
															-
														
 
															-	when N >= 64 {
														
 
															-		x32 := swizzle(v64,
														
 
															-			0,  2,  4,  6,  8,  10, 12, 14,
														
 
															-			16, 18, 20, 22, 24, 26, 28, 30,
														
 
															-			32, 34, 36, 38, 40, 42, 44, 46,
														
 
															-			48, 50, 52, 54, 56, 58, 60, 62)
														
 
															-		y32 := swizzle(v64,
														
 
															-			1,  3,  5,  7,  9,  11, 13, 15,
														
 
															-			17, 19, 21, 23, 25, 27, 29, 31,
														
 
															-			33, 35, 37, 39, 41, 43, 45, 47,
														
 
															-			49, 51, 53, 55, 57, 59, 61, 63)
														
 
															-		v32 := x32 * y32
														
 
															-	}
														
 
															-
														
 
															-	when N >= 32 {
														
 
															-		x16 := swizzle(v32,
														
 
															-			0,  2,  4,  6,  8,  10, 12, 14,
														
 
															-			16, 18, 20, 22, 24, 26, 28, 30)
														
 
															-		y16 := swizzle(v32,
														
 
															-			1,  3,  5,  7,  9,  11, 13, 15,
														
 
															-			17, 19, 21, 23, 25, 27, 29, 31)
														
 
															-		v16 := x16 * y16
														
 
															-	}
														
 
															-
														
 
															-	when N >= 16 {
														
 
															-		x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14)
														
 
															-		y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15)
														
 
															-		v8 := x8 * y8
														
 
															-	}
														
 
															-
														
 
															-	when N >= 8 {
														
 
															-		x4 := swizzle(v8, 0, 2, 4, 6)
														
 
															-		y4 := swizzle(v8, 1, 3, 5, 7)
														
 
															-		v4 := x4 * y4
														
 
															-	}
														
 
															-
														
 
															-	when N >= 4 {
														
 
															-		x2 := swizzle(v4, 0, 2)
														
 
															-		y2 := swizzle(v4, 1, 3)
														
 
															-		v2 := x2 * y2
														
 
															-	}
														
 
															-
														
 
															-	when N >= 2 {
														
 
															-		return extract(v2, 0) * extract(v2, 1)
														
 
															-	} else {
														
 
															-		return extract(v, 0)
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															-Reduce a vector to a scalar by multiplying up all the lanes in a bisecting fashion.
														
 
															-
														
 
															-This procedure returns a scalar that is the product of all lanes, calculated by
														
 
															-bisecting the vector into two parts, where the first contains indices [0, N/2)
														
 
															-and the second contains indices [N/2, N), and multiplying the two halves
														
 
															-together element-wise to produce N/2 values. This is repeated until only a
														
 
															-single element remains. This order may be faster to compute than the ordered
														
 
															-product for floats, as it can often be better parallelized.
														
 
															-
														
 
															-The order of the product may be important for accounting for precision errors
														
 
															-in floating-point computation, as floating-point multiplication is not
														
 
															-associative, that is `(a*b)*c` may not be equal to `a*(b*c)`.
														
 
															-
														
 
															-Inputs:
														
 
															-- `v`: The vector to reduce.
														
 
															-
														
 
															-Result:
														
 
															-- Product of all lanes, as a scalar.
														
 
															-
														
 
															-**Operation**:
														
 
															-
														
 
															-	for n > 1 {
														
 
															-		n = n / 2
														
 
															-		for i in 0 ..< n {
														
 
															-			a[i] *= a[i+n]
														
 
															-		}
														
 
															-	}
														
 
															-	res := a[0]
														
 
															-
														
 
															-Graphical representation of the operation for N=4:
														
 
															-
														
 
															-	     +-----------------------+
														
 
															-	     | v0  | v1  | v2  | v3  |
														
 
															-	     +-----------------------+
														
 
															-	        |     |     |     |
														
 
															-	       [x]<-- | ---'      |
														
 
															-	        |    [x]<--------'
														
 
															-	        |     |
														
 
															-	        `>[x]<'
														
 
															-	           |
														
 
															-	           v
														
 
															-	        +-----+
														
 
															-	result: | y0  |
														
 
															-	        +-----+
														
 
															-*/
														
 
															-reduce_mul_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
														
 
															-	where intrinsics.type_is_numeric(E) {
														
 
															-	when N == 64 { v64 := v }
														
 
															-	when N == 32 { v32 := v }
														
 
															-	when N == 16 { v16 := v }
														
 
															-	when N == 8  { v8 := v }
														
 
															-	when N == 4  { v4 := v }
														
 
															-	when N == 2  { v2 := v }
														
 
															-
														
 
															-	when N >= 64 {
														
 
															-		x32 := swizzle(v64,
														
 
															-			0,  1,  2,  3,  4,  5,  6,  7,
														
 
															-			8,  9,  10, 11, 12, 13, 14, 15,
														
 
															-			16, 17, 18, 19, 20, 21, 22, 23,
														
 
															-			24, 25, 26, 27, 28, 29, 30, 31)
														
 
															-		y32 := swizzle(v64,
														
 
															-			32, 33, 34, 35, 36, 37, 38, 39,
														
 
															-			40, 41, 42, 43, 44, 45, 46, 47,
														
 
															-			48, 49, 50, 51, 52, 53, 54, 55,
														
 
															-			56, 57, 58, 59, 60, 61, 62, 63)
														
 
															-		v32 := x32 * y32
														
 
															-	}
														
 
															-
														
 
															-	when N >= 32 {
														
 
															-		x16 := swizzle(v32,
														
 
															-			0,  1,  2,  3,  4,  5,  6,  7,
														
 
															-			8,  9,  10, 11, 12, 13, 14, 15)
														
 
															-		y16 := swizzle(v32,
														
 
															-			16, 17, 18, 19, 20, 21, 22, 23,
														
 
															-			24, 25, 26, 27, 28, 29, 30, 31)
														
 
															-		v16 := x16 * y16
														
 
															-	}
														
 
															-
														
 
															-	when N >= 16 {
														
 
															-		x8 := swizzle(v16, 0, 1, 2,  3,  4,  5,  6,  7)
														
 
															-		y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15)
														
 
															-		v8 := x8 * y8
														
 
															-	}
														
 
															-
														
 
															-	when N >= 8 {
														
 
															-		x4 := swizzle(v8, 0, 1, 2, 3)
														
 
															-		y4 := swizzle(v8, 4, 5, 6, 7)
														
 
															-		v4 := x4 * y4
														
 
															-	}
														
 
															-
														
 
															-	when N >= 4 {
														
 
															-		x2 := swizzle(v4, 0, 1)
														
 
															-		y2 := swizzle(v4, 2, 3)
														
 
															-		v2 := x2 * y2
														
 
															-	}
														
 
															-
														
 
															-	when N >= 2 {
														
 
															-		return extract(v2, 0) * extract(v2, 1)
														
 
															-	} else {
														
 
															-		return extract(v, 0)
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															+indices :: intrinsics.simd_indices
														
--- a/core/sys/windows/user32.odin
+++ b/core/sys/windows/user32.odin
@@ -47,6 +47,8 @@ foreign user32 {
 
															 		lpParam: LPVOID,
														
 
															 	) -> HWND ---
														
 
															+	GetWindowThreadProcessId :: proc(hwnd: HWND, lpdwProcessId: LPDWORD) -> DWORD ---
														
 
															+
														
 
															 	DestroyWindow :: proc(hWnd: HWND) -> BOOL ---
														
 
															 	ShowWindow :: proc(hWnd: HWND, nCmdShow: INT) -> BOOL ---
														
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -760,6 +760,36 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan
 
															 			return true;
														
 
															 		}
														
 
															+	case BuiltinProc_simd_indices:
														
 
															+		{
														
 
															+			Operand x = {};
														
 
															+			check_expr_or_type(c, &x, ce->args[0], nullptr);
														
 
															+			if (x.mode == Addressing_Invalid) return false;
														
 
															+			if (x.mode != Addressing_Type) {
														
 
															+				gbString s = expr_to_string(x.expr);
														
 
															+				error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s);
														
 
															+				gb_string_free(s);
														
 
															+				return false;
														
 
															+			}
														
 
															+			if (!is_type_simd_vector(x.type)) {
														
 
															+				gbString s = type_to_string(x.type);
														
 
															+				error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s);
														
 
															+				gb_string_free(s);
														
 
															+				return false;
														
 
															+			}
														
 
															+
														
 
															+			Type *elem = base_array_type(x.type);
														
 
															+			if (!is_type_numeric(elem)) {
														
 
															+				gbString s = type_to_string(x.type);
														
 
															+				error(x.expr, "'%.*s' expected a simd vector type with a numeric element type, got '%s'", LIT(builtin_name), s);
														
 
															+				gb_string_free(s);
														
 
															+			}
														
 
															+
														
 
															+			operand->mode = Addressing_Value;
														
 
															+			operand->type = x.type;
														
 
															+			return true;
														
 
															+		}
														
 
															+
														
 
															 	case BuiltinProc_simd_extract:
														
 
															 		{
														
 
															 			Operand x = {};
														
@@ -2059,6 +2089,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 
															 	case BuiltinProc_atomic_type_is_lock_free:
														
 
															 	case BuiltinProc_has_target_feature:
														
 
															 	case BuiltinProc_procedure_of:
														
 
															+	case BuiltinProc_simd_indices:
														
 
															 		// NOTE(bill): The first arg may be a Type, this will be checked case by case
														
 
															 		break;
														
@@ -6001,12 +6032,13 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 
															 			// NOTE(jakubtomsu): forces calculation of variant_block_size
														
 
															 			type_size_of(u);
														
 
															-			i64 tag_offset = u->Union.variant_block_size;
														
 
															-			GB_ASSERT(tag_offset > 0);
														
 
															+			// NOTE(Jeroen): A tag offset of zero is perfectly fine if all members of the union are empty structs.
														
 
															+			//               What matters is that the tag size is > 0.
														
 
															+			GB_ASSERT(u->Union.tag_size > 0);
														
 
															 			operand->mode = Addressing_Constant;
														
 
															 			operand->type = t_untyped_integer;
														
 
															-			operand->value = exact_value_i64(tag_offset);
														
 
															+			operand->value = exact_value_i64(u->Union.variant_block_size);
														
 
															 		}
														
 
															 		break;
														
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -2910,9 +2910,20 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
 
															 		if (!defined) {
														
 
															 			gbString xs = type_to_string(x->type, temporary_allocator());
														
 
															 			gbString ys = type_to_string(y->type, temporary_allocator());
														
 
															-			err_str = gb_string_make(temporary_allocator(),
														
 
															-				gb_bprintf("operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys)
														
 
															-			);
														
 
															+
														
 
															+			if (!is_type_comparable(x->type)) {
														
 
															+				err_str = gb_string_make(temporary_allocator(),
														
 
															+					gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", xs, LIT(token_strings[op]))
														
 
															+				);
														
 
															+			} else if (!is_type_comparable(y->type)) {
														
 
															+				err_str = gb_string_make(temporary_allocator(),
														
 
															+					gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", ys, LIT(token_strings[op]))
														
 
															+				);
														
 
															+			} else {
														
 
															+				err_str = gb_string_make(temporary_allocator(),
														
 
															+					gb_bprintf("Operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys)
														
 
															+				);
														
 
															+			}
														
 
															 		} else {
														
 
															 			Type *comparison_type = x->type;
														
 
															 			if (x->type == err_type && is_operand_nil(*x)) {
														
@@ -2933,11 +2944,11 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
 
															 		} else {
														
 
															 			yt = type_to_string(y->type);
														
 
															 		}
														
 
															-		err_str = gb_string_make(temporary_allocator(), gb_bprintf("mismatched types '%s' and '%s'", xt, yt));
														
 
															+		err_str = gb_string_make(temporary_allocator(), gb_bprintf("Mismatched types '%s' and '%s'", xt, yt));
														
 
															 	}
														
 
															 	if (err_str != nullptr) {
														
 
															-		error(node, "Cannot compare expression, %s", err_str);
														
 
															+		error(node, "Cannot compare expression. %s.", err_str);
														
 
															 		x->type = t_untyped_bool;
														
 
															 	} else {
														
 
															 		if (x->mode == Addressing_Constant &&
														
--- a/src/checker_builtin_procs.hpp
+++ b/src/checker_builtin_procs.hpp
@@ -205,6 +205,9 @@ BuiltinProc__simd_begin,
 
															 	BuiltinProc_simd_masked_expand_load,
														
 
															 	BuiltinProc_simd_masked_compress_store,
														
 
															+	BuiltinProc_simd_indices,
														
 
															+
														
 
															+
														
 
															 	// Platform specific SIMD intrinsics
														
 
															 	BuiltinProc_simd_x86__MM_SHUFFLE,
														
 
															 BuiltinProc__simd_end,
														
@@ -551,6 +554,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 
															 	{STR_LIT("simd_masked_expand_load"),    3, false, Expr_Expr, BuiltinProcPkg_intrinsics},
														
 
															 	{STR_LIT("simd_masked_compress_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
														
 
															+	{STR_LIT("simd_indices"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
														
 
															+
														
 
															 	{STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics},
														
 
															 	{STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
														
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -1293,6 +1293,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
 
															 	lbValue res = {};
														
 
															 	res.type = tv.type;
														
 
															+	switch (builtin_id) {
														
 
															+	case BuiltinProc_simd_indices: {
														
 
															+		Type *type = base_type(res.type);
														
 
															+		GB_ASSERT(type->kind == Type_SimdVector);
														
 
															+		Type *elem = type->SimdVector.elem;
														
 
															+
														
 
															+		i64 count = type->SimdVector.count;
														
 
															+		LLVMValueRef *scalars = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
														
 
															+		for (i64 i = 0; i < count; i++) {
														
 
															+			scalars[i] = lb_const_value(m, elem, exact_value_i64(i)).value;
														
 
															+		}
														
 
															+
														
 
															+		res.value = LLVMConstVector(scalars, cast(unsigned)count);
														
 
															+		return res;
														
 
															+	}
														
 
															+	}
														
 
															+
														
 
															 	lbValue arg0 = {}; if (ce->args.count > 0) arg0 = lb_build_expr(p, ce->args[0]);
														
 
															 	lbValue arg1 = {}; if (ce->args.count > 1) arg1 = lb_build_expr(p, ce->args[1]);
														
 
															 	lbValue arg2 = {}; if (ce->args.count > 2) arg2 = lb_build_expr(p, ce->args[2]);
														
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -4108,10 +4108,10 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
 
															 		}
														
 
															 		i64 max = 0;
														
 
															-		i64 field_size = 0;
														
 
															 		for_array(i, t->Union.variants) {
														
 
															 			Type *variant_type = t->Union.variants[i];
														
 
															+
														
 
															 			i64 size = type_size_of_internal(variant_type, path);
														
 
															 			if (max < size) {
														
 
															 				max = size;
														
@@ -4130,7 +4130,7 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
 
															 			size = align_formula(max, tag_size);
														
 
															 			// NOTE(bill): Calculate the padding between the common fields and the tag
														
 
															 			t->Union.tag_size = cast(i16)tag_size;
														
 
															-			t->Union.variant_block_size = size - field_size;
														
 
															+			t->Union.variant_block_size = size;
														
 
															 			size += tag_size;
														
 
															 		}