Browse Source

Merge pull request #3526 from laytan/target-features

Improve target features support
gingerBill 1 year ago
parent
commit
15f7148eae

+ 3 - 1
.gitignore

@@ -322,4 +322,6 @@ build.sh
 !core/debug/
 
 # RAD debugger project file
-*.raddbg
+*.raddbg
+
+misc/featuregen/featuregen

+ 8 - 2
base/intrinsics/intrinsics.odin

@@ -282,6 +282,12 @@ simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
 simd_rotate_left  :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
 simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
 
+// Checks if the current target supports the given target features.
+//
+// Takes a constant comma-seperated string (eg: "sha512,sse4.1"), or a procedure type which has either
+// `@(require_target_feature)` or `@(enable_target_feature)` as its input and returns a boolean indicating
+// if all listed features are supported.
+has_target_feature :: proc($test: $T) -> bool where type_is_string(T) || type_is_proc(T) ---
 
 // WASM targets only
 wasm_memory_grow :: proc(index, delta: uintptr) -> int ---
@@ -293,9 +299,9 @@ wasm_memory_size :: proc(index: uintptr)        -> int ---
 // 0 - indicates that the thread blocked and then was woken up
 // 1 - the loaded value from `ptr` did not match `expected`, the thread did not block
 // 2 - the thread blocked, but the timeout
-@(enable_target_feature="atomics")
+@(require_target_feature="atomics")
 wasm_memory_atomic_wait32   :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 ---
-@(enable_target_feature="atomics")
+@(require_target_feature="atomics")
 wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) ---
 
 // x86 Targets (i386, amd64)

+ 2 - 2
core/simd/x86/sse3.odin

@@ -36,7 +36,7 @@ _mm_lddqu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i {
 _mm_movedup_pd :: #force_inline proc "c" (a: __m128d) -> __m128d {
 	return simd.shuffle(a, a, 0, 0)
 }
-@(require_results, enable_target_feature="sse3")
+@(require_results, enable_target_feature="sse2,sse3")
 _mm_loaddup_pd :: #force_inline proc "c" (mem_addr: [^]f64) -> __m128d {
 	return _mm_load1_pd(mem_addr)
 }
@@ -65,4 +65,4 @@ foreign _ {
 	hsubps :: proc(a, b: __m128) -> __m128 ---
 	@(link_name = "llvm.x86.sse3.ldu.dq")
 	lddqu :: proc(mem_addr: rawptr) -> i8x16 ---
-}
+}

+ 2 - 2
core/simd/x86/sse41.odin

@@ -268,7 +268,7 @@ _mm_testnzc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
 _mm_test_all_zeros :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
 	return _mm_testz_si128(a, mask)
 }
-@(require_results, enable_target_feature="sse4.1")
+@(require_results, enable_target_feature="sse2,sse4.1")
 _mm_test_all_ones :: #force_inline proc "c" (a: __m128i) -> i32 {
 	return _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
 }
@@ -349,4 +349,4 @@ foreign _ {
 	ptestc     :: proc(a, mask: i64x2) -> i32 ---
 	@(link_name = "llvm.x86.sse41.ptestnzc")
 	ptestnzc   :: proc(a, mask: i64x2) -> i32 ---
-}
+}

+ 31 - 13
core/sync/futex_wasm.odin

@@ -5,31 +5,49 @@ package sync
 import "base:intrinsics"
 import "core:time"
 
+// NOTE: because `core:sync` is in the dependency chain of a lot of the core packages (mostly through `core:mem`)
+// without actually calling into it much, I opted for a runtime panic instead of a compile error here.
+
 _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
-	s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
-	return s != 0
+	when !intrinsics.has_target_feature("atomics") {
+		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+	} else {
+		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
+		return s != 0
+	}
 }
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
-	s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
-	return s != 0
-
+	when !intrinsics.has_target_feature("atomics") {
+		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+	} else {
+		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
+		return s != 0
+	}
 }
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
-	loop: for {
-		s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
-		if s >= 1 {
-			return
+	when !intrinsics.has_target_feature("atomics") {
+		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+	} else {
+		loop: for {
+			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
+			if s >= 1 {
+				return
+			}
 		}
 	}
 }
 
 _futex_broadcast :: proc "contextless" (f: ^Futex) {
-	loop: for {
-		s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))
-		if s >= 0 {
-			return
+	when !intrinsics.has_target_feature("atomics") {
+		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+	} else {
+		loop: for {
+			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))
+			if s >= 0 {
+				return
+			}
 		}
 	}
 }

+ 28 - 0
misc/featuregen/README.md

@@ -0,0 +1,28 @@
+# Featuregen
+
+This directory contains a python and CPP script that generates the needed information
+for features regarding microarchitecture and target features of the compiler.
+
+It is not pretty! But LLVM has no way to query this information with their C API.
+
+It generates these globals (intended for `src/build_settings.cpp`:
+
+- `target_microarch_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of microarchitectures available on that architecture
+- `target_features_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of target features available on that architecture
+- `target_microarch_counts`: an array of ints indexed by the architecture, each int represents the amount of microarchitectures available on that target, intended for easier iteration of the next global
+- `microarch_features_list`: an array of a tuple like struct where the first string is a microarchitecture and the second is a comma-seperated list of all features that are enabled by default for it
+
+In order to get the default features for a microarchitecture there is a small CPP program that takes
+a target triple and microarchitecture and spits out the default features, this is then parsed by the python script.
+
+This should be ran each time we update LLVM to stay in sync.
+
+If there are minor differences (like the Odin user using LLVM 14 and this table being generated on LLVM 17) it
+does not impact much at all, the only thing it will do is make LLVM print a message that the feature is ignored (if it was added between 14 and 17 in this case).
+
+## Usage
+
+1. Make sure the table of architectures at the top of the python script is up-to-date (the triple can be any valid triple for the architecture)
+1. `./build.sh`
+1. `python3 featuregen.py`
+1. Copy the output into `src/build_settings.cpp`

+ 37 - 0
misc/featuregen/featuregen.cpp

@@ -0,0 +1,37 @@
+#include <llvm/MC/MCSubtargetInfo.h>
+#include <llvm/MC/TargetRegistry.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/Support/InitLLVM.h>
+#include <llvm/Support/TargetSelect.h>
+
+// Dumps the default set of supported features for the given microarch.
+int main(int argc, char **argv) {
+	if (argc < 3) {
+		llvm::errs() << "Error: first arg should be triple, second should be microarch\n";
+		return 1;
+	}
+
+	llvm::InitializeAllTargets();
+	llvm::InitializeAllTargetMCs();
+
+	std::string error;
+	const llvm::Target* target = llvm::TargetRegistry::lookupTarget(argv[1], error);
+
+	if (!target) {
+		llvm::errs() << "Error: " << error << "\n";
+		return 1;
+	}
+
+	auto STI = target->createMCSubtargetInfo(argv[1], argv[2], "");
+
+	std::string plus = "+";
+	llvm::ArrayRef<llvm::SubtargetFeatureKV> features = STI->getAllProcessorFeatures();
+	for (const auto& feature : features) {
+		if (STI->checkFeatures(plus + feature.Key)) {
+			llvm::outs() << feature.Key << "\n";
+		}
+	}
+
+	return 0;
+}

+ 116 - 0
misc/featuregen/featuregen.py

@@ -0,0 +1,116 @@
+import subprocess
+import tempfile
+import os
+import sys
+
+archs = [
+	("amd64",     "linux_amd64", "x86_64-pc-linux-gnu", [], []),
+	("i386",      "linux_i386",  "i386-pc-linux-gnu",   [], []),
+	("arm32",     "linux_arm32", "arm-linux-gnu",       [], []),
+	("arm64",     "linux_arm64", "aarch64-linux-elf",   [], []),
+	("wasm32",    "js_wasm32",   "wasm32-js-js",        [], []),
+	("wasm64p32", "js_wasm64p32","wasm32-js-js",        [], []),
+];
+
+SEEKING_CPUS     = 0
+PARSING_CPUS     = 1
+PARSING_FEATURES = 2
+
+with tempfile.NamedTemporaryFile(suffix=".odin", delete=True) as temp_file:
+	temp_file.write(b"package main\n")
+
+	for arch, target, triple, cpus, features in archs:
+		cmd = ["odin", "build", temp_file.name, "-file", "-build-mode:llvm", "-out:temp", "-target-features:\"help\"", f"-target:\"{target}\""]
+		process = subprocess.Popen(cmd, stderr=subprocess.PIPE, text=True)
+
+		state = SEEKING_CPUS
+		for line in process.stderr:
+
+			if state == SEEKING_CPUS:
+				if line == "Available CPUs for this target:\n":
+					state = PARSING_CPUS
+			
+			elif state == PARSING_CPUS:
+				if line == "Available features for this target:\n":
+					state = PARSING_FEATURES
+					continue
+			
+				parts = line.split(" -", maxsplit=1)
+				if len(parts) < 2:
+					continue
+
+				cpu = parts[0].strip()
+				cpus.append(cpu)
+
+			elif state == PARSING_FEATURES:
+				if line == "\n" and len(features) > 0:
+					break
+
+				parts = line.split(" -", maxsplit=1)
+				if len(parts) < 2:
+					continue
+
+				feature = parts[0].strip()
+				features.append(feature)
+
+		process.wait()
+		if process.returncode != 0:
+			print(f"odin build returned with non-zero exit code {process.returncode}")
+			sys.exit(1)
+
+		os.remove("temp.ll")
+
+def print_default_features(triple, microarch):
+	cmd = ["./featuregen", triple, microarch]
+	process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True)
+	first = True
+	for line in process.stdout:
+		print("" if first else ",", line.strip(), sep="", end="")
+		first = False
+	process.wait()
+	if process.returncode != 0:
+		print(f"featuregen returned with non-zero exit code {process.returncode}")
+		sys.exit(1)
+
+print("// Generated with the featuregen script in `misc/featuregen`")
+print("gb_global String target_microarch_list[TargetArch_COUNT] = {")
+print("\t// TargetArch_Invalid:")
+print('\tstr_lit(""),')
+for arch, target, triple, cpus, features in archs:
+	print(f"\t// TargetArch_{arch}:")
+	print(f'\tstr_lit("{','.join(cpus)}"),')
+print("};")
+
+print("")
+
+print("// Generated with the featuregen script in `misc/featuregen`")
+print("gb_global String target_features_list[TargetArch_COUNT] = {")
+print("\t// TargetArch_Invalid:")
+print('\tstr_lit(""),')
+for arch, target, triple, cpus, features in archs:
+	print(f"\t// TargetArch_{arch}:")
+	print(f'\tstr_lit("{','.join(features)}"),')
+print("};")
+
+print("")
+
+print("// Generated with the featuregen script in `misc/featuregen`")
+print("gb_global int target_microarch_counts[TargetArch_COUNT] = {")
+print("\t// TargetArch_Invalid:")
+print("\t0,")
+for arch, target, triple, cpus, feature in archs:
+	print(f"\t// TargetArch_{arch}:")
+	print(f"\t{len(cpus)},")
+print("};")
+
+print("")
+
+print("// Generated with the featuregen script in `misc/featuregen`")
+print("gb_global MicroarchFeatureList microarch_features_list[] = {")
+for arch, target, triple, cpus, features in archs:
+	print(f"\t// TargetArch_{arch}:")
+	for cpu in cpus:
+		print(f'\t{{ str_lit("{cpu}"), str_lit("', end="")
+		print_default_features(triple, cpu)
+		print('") },')
+print("};")

File diff suppressed because it is too large
+ 33 - 14
src/build_settings.cpp


+ 44 - 2
src/check_builtin.cpp

@@ -1719,6 +1719,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 	case BuiltinProc_objc_register_selector: 
 	case BuiltinProc_objc_register_class: 
 	case BuiltinProc_atomic_type_is_lock_free:
+	case BuiltinProc_has_target_feature:
 		// NOTE(bill): The first arg may be a Type, this will be checked case by case
 		break;
 
@@ -3663,6 +3664,41 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 		break;
 	}
 
+	case BuiltinProc_has_target_feature: {
+		String features = str_lit("");
+
+		check_expr_or_type(c, operand, ce->args[0]);
+
+		if (is_type_string(operand->type) && operand->mode == Addressing_Constant) {
+			GB_ASSERT(operand->value.kind == ExactValue_String);
+			features = operand->value.value_string;
+		} else {
+			Type *pt = base_type(operand->type);
+			if (pt->kind == Type_Proc) {
+				if (pt->Proc.require_target_feature.len != 0) {
+					GB_ASSERT(pt->Proc.enable_target_feature.len == 0);
+					features = pt->Proc.require_target_feature;
+				} else if (pt->Proc.enable_target_feature.len != 0) {
+					features = pt->Proc.enable_target_feature;
+				} else {
+					error(ce->args[0], "Expected the procedure type given to '%.*s' to have @(require_target_feature=\"...\") or @(enable_target_feature=\"...\")", LIT(builtin_name));
+				}
+			} else {
+				error(ce->args[0], "Expected a constant string or procedure type for '%.*s'", LIT(builtin_name));
+			}
+		}
+
+		String invalid;
+		if (!check_target_feature_is_valid_globally(features, &invalid)) {
+			error(ce->args[0], "Target feature '%.*s' is not a valid target feature", LIT(invalid));
+		}
+
+		operand->value = exact_value_bool(check_target_feature_is_enabled(features, nullptr));
+		operand->mode = Addressing_Constant;
+		operand->type = t_untyped_bool;
+		break;
+	}
+
 	case BuiltinProc_soa_struct: {
 		Operand x = {};
 		Operand y = {};
@@ -6014,7 +6050,10 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 				return false;
 			}
 
-			enable_target_feature({}, str_lit("atomics"));
+			if (!check_target_feature_is_enabled(str_lit("atomics"), nullptr)) {
+				error(call, "'%.*s' requires target feature 'atomics' to be enabled, enable it with -target-features:\"atomics\" or choose a different -microarch", LIT(builtin_name));
+				return false;
+			}
 
 			Operand ptr = {};
 			Operand expected = {};
@@ -6068,7 +6107,10 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 				return false;
 			}
 
-			enable_target_feature({}, str_lit("atomics"));
+			if (!check_target_feature_is_enabled(str_lit("atomics"), nullptr)) {
+				error(call, "'%.*s' requires target feature 'atomics' to be enabled, enable it with -target-features:\"atomics\" or choose a different -microarch", LIT(builtin_name));
+				return false;
+			}
 
 			Operand ptr = {};
 			Operand waiters = {};

+ 39 - 10
src/check_decl.cpp

@@ -886,17 +886,37 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) {
 
 	check_objc_methods(ctx, e, ac);
 
-	if (ac.require_target_feature.len != 0 && ac.enable_target_feature.len != 0) {
-		error(e->token, "Attributes @(require_target_feature=...) and @(enable_target_feature=...) cannot be used together");
-	} else if (ac.require_target_feature.len != 0) {
-		if (check_target_feature_is_enabled(e->token.pos, ac.require_target_feature)) {
-			e->Procedure.target_feature = ac.require_target_feature;
-		} else {
-			e->Procedure.target_feature_disabled = true;
+	{
+		if (ac.require_target_feature.len != 0 && ac.enable_target_feature.len != 0) {
+			error(e->token, "A procedure cannot have both @(require_target_feature=\"...\") and @(enable_target_feature=\"...\")");
+		}
+
+		if (build_context.strict_target_features && ac.enable_target_feature.len != 0) {
+			ac.require_target_feature = ac.enable_target_feature;
+			ac.enable_target_feature.len = 0;
+		}
+
+		if (ac.require_target_feature.len != 0) {
+			pt->require_target_feature = ac.require_target_feature;
+			String invalid;
+			if (!check_target_feature_is_valid_globally(ac.require_target_feature, &invalid)) {
+				error(e->token, "Required target feature '%.*s' is not a valid target feature", LIT(invalid));
+			} else if (!check_target_feature_is_enabled(ac.require_target_feature, nullptr)) {
+				e->flags |= EntityFlag_Disabled;
+			}
+		} else if (ac.enable_target_feature.len != 0) {
+
+			// NOTE: disallow wasm, features on that arch are always global to the module.
+			if (is_arch_wasm()) {
+				error(e->token, "@(enable_target_feature=\"...\") is not allowed on wasm, features for wasm must be declared globally");
+			}
+
+			pt->enable_target_feature = ac.enable_target_feature;
+			String invalid;
+			if (!check_target_feature_is_valid_globally(ac.enable_target_feature, &invalid)) {
+				error(e->token, "Procedure enabled target feature '%.*s' is not a valid target feature", LIT(invalid));
+			}
 		}
-	} else if (ac.enable_target_feature.len != 0) {
-		enable_target_feature(e->token.pos, ac.enable_target_feature);
-		e->Procedure.target_feature = ac.enable_target_feature;
 	}
 
 	switch (e->Procedure.optimization_mode) {
@@ -1370,6 +1390,10 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D
 			continue;
 		}
 
+		if (p->flags & EntityFlag_Disabled) {
+			continue;
+		}
+
 		String name = p->token.string;
 
 		for (isize k = j+1; k < pge->entities.count; k++) {
@@ -1387,6 +1411,10 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D
 
 			ERROR_BLOCK();
 
+			if (q->flags & EntityFlag_Disabled) {
+				continue;
+			}
+
 			ProcTypeOverloadKind kind = are_proc_types_overload_safe(p->type, q->type);
 			bool both_have_where_clauses = false;
 			if (p->decl_info->proc_lit != nullptr && q->decl_info->proc_lit != nullptr) {
@@ -1423,6 +1451,7 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D
 				break;
 			case ProcOverload_ParamCount:
 			case ProcOverload_ParamTypes:
+			case ProcOverload_TargetFeatures:
 				// This is okay :)
 				break;
 

+ 67 - 1
src/check_expr.cpp

@@ -6528,12 +6528,17 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c,
 		array_add(&proc_entities, proc);
 	}
 
+	int max_matched_features = 0;
 
 	gbString expr_name = expr_to_string(operand->expr);
 	defer (gb_string_free(expr_name));
 
 	for_array(i, procs) {
 		Entity *p = procs[i];
+		if (p->flags & EntityFlag_Disabled) {
+			continue;
+		}
+
 		Type *pt = base_type(p->type);
 		if (pt != nullptr && is_type_proc(pt)) {
 			CallArgumentData data = {};
@@ -6564,11 +6569,24 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c,
 				item.score += assign_score_function(1);
 			}
 
+			max_matched_features = gb_max(max_matched_features, matched_target_features(&pt->Proc));
+
 			item.index = index;
 			array_add(&valids, item);
 		}
 	}
 
+	if (max_matched_features > 0) {
+		for_array(i, valids) {
+			Entity *p = procs[valids[i].index];
+			Type *t = base_type(p->type);
+			GB_ASSERT(t->kind == Type_Proc);
+
+			int matched = matched_target_features(&t->Proc);
+			valids[i].score += assign_score_function(max_matched_features-matched);
+		}
+	}
+
 	if (valids.count > 1) {
 		array_sort(valids, valid_index_and_score_cmp);
 		i64 best_score = valids[0].score;
@@ -6710,7 +6728,11 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c,
 		ERROR_BLOCK();
 
 		error(operand->expr, "Ambiguous procedure group call '%s' that match with the given arguments", expr_name);
-		print_argument_types();
+		if (positional_operands.count == 0 && named_operands.count == 0) {
+			error_line("\tNo given arguments\n");
+		} else {
+			print_argument_types();
+		}
 
 		for (auto const &valid : valids) {
 			Entity *proc = proc_entities[valid.index];
@@ -7555,8 +7577,11 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c
 		}
 	}
 
+	bool is_call_inlined = false;
+
 	switch (inlining) {
 	case ProcInlining_inline:
+		is_call_inlined = true;
 		if (proc != nullptr) {
 			Entity *e = entity_from_expr(proc);
 			if (e != nullptr && e->kind == Entity_Procedure) {
@@ -7572,6 +7597,47 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c
 		break;
 	case ProcInlining_no_inline:
 		break;
+	case ProcInlining_none:
+		if (proc != nullptr) {
+			Entity *e = entity_from_expr(proc);
+			if (e != nullptr && e->kind == Entity_Procedure) {
+				DeclInfo *decl = e->decl_info;
+				if (decl->proc_lit) {
+					ast_node(pl, ProcLit, decl->proc_lit);
+					if (pl->inlining == ProcInlining_inline) {
+						is_call_inlined = true;
+					}
+				}
+			}
+		}
+	}
+
+	{
+		String invalid;
+		if (pt->kind == Type_Proc && pt->Proc.require_target_feature.len != 0) {
+			if (!check_target_feature_is_valid_for_target_arch(pt->Proc.require_target_feature, &invalid)) {
+				error(call, "Called procedure requires target feature '%.*s' which is invalid for the build target", LIT(invalid));
+			} else if (!check_target_feature_is_enabled(pt->Proc.require_target_feature, &invalid)) {
+				error(call, "Calling this procedure requires target feature '%.*s' to be enabled", LIT(invalid));
+			}
+		}
+
+		if (pt->kind == Type_Proc && pt->Proc.enable_target_feature.len != 0) {
+			if (!check_target_feature_is_valid_for_target_arch(pt->Proc.enable_target_feature, &invalid)) {
+				error(call, "Called procedure enables target feature '%.*s' which is invalid for the build target", LIT(invalid));
+			}
+
+			// NOTE: Due to restrictions in LLVM you can not inline calls with a superset of features.
+			if (is_call_inlined) {
+				GB_ASSERT(c->curr_proc_decl);
+				GB_ASSERT(c->curr_proc_decl->entity);
+				GB_ASSERT(c->curr_proc_decl->entity->type->kind == Type_Proc);
+				String scope_features = c->curr_proc_decl->entity->type->Proc.enable_target_feature;
+				if (!check_target_feature_is_superset_of(scope_features, pt->Proc.enable_target_feature, &invalid)) {
+					error(call, "Inlined procedure enables target feature '%.*s', this requires the calling procedure to at least enable the same feature", LIT(invalid));
+				}
+			}
+		}
 	}
 
 	operand->expr = call;

+ 4 - 0
src/checker_builtin_procs.hpp

@@ -44,6 +44,8 @@ enum BuiltinProcId {
 	// "Intrinsics"
 	BuiltinProc_is_package_imported,
 
+	BuiltinProc_has_target_feature,
+
 	BuiltinProc_transpose,
 	BuiltinProc_outer_product,
 	BuiltinProc_hadamard_product,
@@ -354,6 +356,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 	// "Intrinsics"
 	{STR_LIT("is_package_imported"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 
+	{STR_LIT("has_target_feature"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
+
 	{STR_LIT("transpose"),        1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("outer_product"),    2, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics},

+ 1 - 3
src/entity.cpp

@@ -252,10 +252,8 @@ struct Entity {
 			bool    is_foreign                 : 1;
 			bool    is_export                  : 1;
 			bool    generated_from_polymorphic : 1;
-			bool    target_feature_disabled    : 1;
 			bool    entry_point_only           : 1;
 			bool    has_instrumentation        : 1;
-			String  target_feature;
 		} Procedure;
 		struct {
 			Array<Entity *> entities;
@@ -502,4 +500,4 @@ gb_internal bool is_entity_local_variable(Entity *e) {
 
 	return ((e->scope->flags &~ ScopeFlag_ContextDefined) == 0) ||
 	       (e->scope->flags & ScopeFlag_Proc) != 0;
-}
+}

+ 45 - 59
src/llvm_backend.cpp

@@ -41,6 +41,37 @@ String get_default_microarchitecture() {
 	return default_march;
 }
 
+String get_final_microarchitecture() {
+	BuildContext *bc = &build_context;
+
+	String microarch = bc->microarch;
+	if (microarch.len == 0) {
+		microarch = get_default_microarchitecture();
+	} else if (microarch == str_lit("native")) {
+		microarch = make_string_c(LLVMGetHostCPUName());
+	}
+	return microarch;
+}
+
+gb_internal String get_default_features() {
+	BuildContext *bc = &build_context;
+
+	int off = 0;
+	for (int i = 0; i < bc->metrics.arch; i += 1) {
+		off += target_microarch_counts[i];
+	}
+
+	String microarch = get_final_microarchitecture();
+	for (int i = off; i < off+target_microarch_counts[bc->metrics.arch]; i += 1) {
+		if (microarch_features_list[i].microarch == microarch) {
+			return microarch_features_list[i].features;
+		}
+	}
+
+	GB_PANIC("unknown microarch");
+	return {};
+}
+
 gb_internal void lb_add_foreign_library_path(lbModule *m, Entity *e) {
 	if (e == nullptr) {
 		return;
@@ -2468,69 +2499,24 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
 		code_mode = LLVMCodeModelKernel;
 	}
 
-	String      host_cpu_name = copy_string(permanent_allocator(), make_string_c(LLVMGetHostCPUName()));
-	String      llvm_cpu      = get_default_microarchitecture();
-	char const *llvm_features = "";
-	if (build_context.microarch.len != 0) {
-		if (build_context.microarch == "native") {
-			llvm_cpu = host_cpu_name;
-		} else {
-			llvm_cpu = copy_string(permanent_allocator(), build_context.microarch);
-		}
-		if (llvm_cpu == host_cpu_name) {
-			llvm_features = LLVMGetHostCPUFeatures();
+	String llvm_cpu = get_final_microarchitecture();
+
+	gbString llvm_features = gb_string_make(temporary_allocator(), "");
+	String_Iterator it = {build_context.target_features_string, 0};
+	bool first = true;
+	for (;;) {
+		String str = string_split_iterator(&it, ',');
+		if (str == "") break;
+		if (!first) {
+			llvm_features = gb_string_appendc(llvm_features, ",");
 		}
-	}
+		first = false;
 
-	// NOTE(Jeroen): Uncomment to get the list of supported microarchitectures.
-	/*
-	if (build_context.microarch == "?") {
-		string_set_add(&build_context.target_features_set, str_lit("+cpuhelp"));
+		llvm_features = gb_string_appendc(llvm_features, "+");
+		llvm_features = gb_string_append_length(llvm_features, str.text, str.len);
 	}
-	*/
 
-	if (build_context.target_features_set.entries.count != 0) {
-		// Prefix all of the features with a `+`, because we are
-		// enabling additional features.
-		char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true);
-
-		String f_string = make_string_c(llvm_features);
-		String a_string = make_string_c(additional_features);
-		isize f_len = f_string.len;
-
-		if (f_len == 0) {
-			// The common case is that llvm_features is empty, so
-			// the target_features_set additions can be used as is.
-			llvm_features = additional_features;
-		} else {
-			// The user probably specified `-microarch:native`, so
-			// llvm_features is populated by LLVM's idea of what
-			// the host CPU supports.
-			//
-			// As far as I can tell, (which is barely better than
-			// wild guessing), a bitset is formed by parsing the
-			// string left to right.
-			//
-			// So, llvm_features + ',' + additonal_features, will
-			// makes the target_features_set override llvm_features.
-
-			char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1);
-			isize len = 0;
-
-			// tmp = f_string
-			gb_memmove(tmp, f_string.text, f_string.len);
-			len += f_string.len;
-			// tmp += ','
-			tmp[len++] = ',';
-			// tmp += a_string
-			gb_memmove(tmp + len, a_string.text, a_string.len);
-			len += a_string.len;
-			// tmp += NUL
-			tmp[len++] = 0;
-
-			llvm_features = tmp;
-		}
-	}
+	debugf("CPU: %.*s, Features: %s\n", LIT(llvm_cpu), llvm_features);	
 
 	// GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target));
 

+ 17 - 10
src/llvm_backend_proc.cpp

@@ -177,17 +177,24 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i
 		break;
 	}
 
-	if (!entity->Procedure.target_feature_disabled &&
-	    entity->Procedure.target_feature.len != 0) {
-	    	auto features = split_by_comma(entity->Procedure.target_feature);
-		for_array(i, features) {
-			String feature = features[i];
-			LLVMAttributeRef ref = LLVMCreateStringAttribute(
-				m->ctx,
-				cast(char const *)feature.text, cast(unsigned)feature.len,
-				"", 0);
-			LLVMAddAttributeAtIndex(p->value, LLVMAttributeIndex_FunctionIndex, ref);
+	if (pt->Proc.enable_target_feature.len != 0) {
+		gbString feature_str = gb_string_make(temporary_allocator(), "");
+
+		String_Iterator it = {pt->Proc.enable_target_feature, 0};
+		bool first = true;
+		for (;;) {
+			String str = string_split_iterator(&it, ',');
+			if (str == "") break;
+			if (!first) {
+				feature_str = gb_string_appendc(feature_str, ",");
+			}
+			first = false;
+
+			feature_str = gb_string_appendc(feature_str, "+");
+			feature_str = gb_string_append_length(feature_str, str.text, str.len);
 		}
+
+		lb_add_attribute_to_proc_with_string(m, p->value, make_string_c("target-features"), make_string_c(feature_str));
 	}
 
 	if (entity->flags & EntityFlag_Cold) {

+ 2 - 1
src/llvm_backend_utility.cpp

@@ -1708,7 +1708,8 @@ gb_internal lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValu
 	if (is_possible) {
 		switch (build_context.metrics.arch) {
 		case TargetArch_amd64:
-			if (type_size_of(t) == 2) {
+			// NOTE: using the intrinsic when not supported causes slow codegen (See #2928).
+			if (type_size_of(t) == 2 || !check_target_feature_is_enabled(str_lit("fma"), nullptr)) {
 				is_possible = false;
 			}
 			break;

+ 73 - 6
src/main.cpp

@@ -272,6 +272,7 @@ enum BuildFlagKind {
 	BuildFlag_ExtraAssemblerFlags,
 	BuildFlag_Microarch,
 	BuildFlag_TargetFeatures,
+	BuildFlag_StrictTargetFeatures,
 	BuildFlag_MinimumOSVersion,
 	BuildFlag_NoThreadLocal,
 
@@ -467,6 +468,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_ExtraAssemblerFlags,     str_lit("extra-assembler-flags"),     BuildFlagParam_String,  Command__does_build);
 	add_flag(&build_flags, BuildFlag_Microarch,               str_lit("microarch"),                 BuildFlagParam_String,  Command__does_build);
 	add_flag(&build_flags, BuildFlag_TargetFeatures,          str_lit("target-features"),           BuildFlagParam_String,  Command__does_build);
+	add_flag(&build_flags, BuildFlag_StrictTargetFeatures,    str_lit("strict-target-features"),    BuildFlagParam_None,    Command__does_build);
 	add_flag(&build_flags, BuildFlag_MinimumOSVersion,        str_lit("minimum-os-version"),        BuildFlagParam_String,  Command__does_build);
 
 	add_flag(&build_flags, BuildFlag_RelocMode,               str_lit("reloc-mode"),                BuildFlagParam_String,  Command__does_build);
@@ -1083,6 +1085,9 @@ gb_internal bool parse_build_flags(Array<String> args) {
 							string_to_lower(&build_context.target_features_string);
 							break;
 						}
+					    case BuildFlag_StrictTargetFeatures:
+						    build_context.strict_target_features = true;
+						    break;
 						case BuildFlag_MinimumOSVersion: {
 							GB_ASSERT(value.kind == ExactValue_String);
 							build_context.minimum_os_version_string = value.value_string;
@@ -1981,7 +1986,20 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(2, "Examples:");
 		print_usage_line(3, "-microarch:sandybridge");
 		print_usage_line(3, "-microarch:native");
-		print_usage_line(3, "-microarch:? for a list");
+		print_usage_line(3, "-microarch:\"?\" for a list");
+		print_usage_line(0, "");
+
+		print_usage_line(1, "-target-features:<string>");
+		print_usage_line(2, "Specifies CPU features to enable on top of the enabled features implied by -microarch.");
+		print_usage_line(2, "Examples:");
+		print_usage_line(3, "-target-features:atomics");
+		print_usage_line(3, "-target-features:\"sse2,aes\"");
+		print_usage_line(3, "-target-features:\"?\" for a list");
+		print_usage_line(0, "");
+
+		print_usage_line(1, "-strict-target-features");
+		print_usage_line(2, "Makes @(enable_target_features=\"...\") behave the same way as @(require_target_features=\"...\").");
+		print_usage_line(2, "This enforces that all generated code uses features supported by the combination of -target, -microarch, and -target-features.");
 		print_usage_line(0, "");
 
 		print_usage_line(1, "-reloc-mode:<string>");
@@ -2663,7 +2681,7 @@ int main(int arg_count, char const **arg_ptr) {
 
 	// Check chosen microarchitecture. If not found or ?, print list.
 	bool print_microarch_list = true;
-	if (build_context.microarch.len == 0) {
+	if (build_context.microarch.len == 0 || build_context.microarch == str_lit("native")) {
 		// Autodetect, no need to print list.
 		print_microarch_list = false;
 	} else {
@@ -2680,6 +2698,11 @@ int main(int arg_count, char const **arg_ptr) {
 		}
 	}
 
+	// Set and check build paths...
+	if (!init_build_paths(init_filename)) {
+		return 1;
+	}
+
 	String default_march = get_default_microarchitecture();
 	if (print_microarch_list) {
 		if (build_context.microarch != "?") {
@@ -2703,13 +2726,57 @@ int main(int arg_count, char const **arg_ptr) {
 		return 0;
 	}
 
-	// Set and check build paths...
-	if (!init_build_paths(init_filename)) {
-		return 1;
+	String march = get_final_microarchitecture();
+	String default_features = get_default_features();
+	{
+		String_Iterator it = {default_features, 0};
+		for (;;) {
+			String str = string_split_iterator(&it, ',');
+			if (str == "") break;
+			string_set_add(&build_context.target_features_set, str);
+		}
+	}
+
+	if (build_context.target_features_string.len != 0) {
+		String_Iterator target_it = {build_context.target_features_string, 0};
+		for (;;) {
+			String item = string_split_iterator(&target_it, ',');
+			if (item == "") break;
+
+			String invalid;
+			if (!check_target_feature_is_valid_for_target_arch(item, &invalid) && item != str_lit("help")) {
+				if (item != str_lit("?")) {
+					gb_printf_err("Unkown target feature '%.*s'.\n", LIT(invalid));
+				}
+				gb_printf("Possible -target-features for target %.*s are:\n", LIT(target_arch_names[build_context.metrics.arch]));
+				gb_printf("\n");
+
+				String feature_list = target_features_list[build_context.metrics.arch];
+				String_Iterator it = {feature_list, 0};
+				for (;;) {
+					String str = string_split_iterator(&it, ',');
+					if (str == "") break;
+					if (check_single_target_feature_is_valid(default_features, str)) {
+						if (has_ansi_terminal_colours()) {
+							gb_printf("\t%.*s\x1b[38;5;244m (implied by target microarch %.*s)\x1b[0m\n", LIT(str), LIT(march));
+						} else {
+							gb_printf("\t%.*s (implied by current microarch %.*s)\n", LIT(str), LIT(march));
+						}
+					} else {
+						gb_printf("\t%.*s\n", LIT(str));
+					}
+				}
+
+				return 1;
+			}
+
+			string_set_add(&build_context.target_features_set, item);
+		}
 	}
 
 	if (build_context.show_debug_messages) {
-		debugf("Selected microarch: %.*s\n", LIT(default_march));
+		debugf("Selected microarch: %.*s\n", LIT(march));
+		debugf("Default microarch features: %.*s\n", LIT(default_features));
 		for_array(i, build_context.build_paths) {
 			String build_path = path_to_string(heap_allocator(), build_context.build_paths[i]);
 			debugf("build_paths[%ld]: %.*s\n", i, LIT(build_path));

+ 22 - 0
src/types.cpp

@@ -184,6 +184,8 @@ struct TypeProc {
 	isize    specialization_count;
 	ProcCallingConvention calling_convention;
 	i32      variadic_index;
+	String   require_target_feature;
+	String   enable_target_feature;
 	// TODO(bill): Make this a flag set rather than bools
 	bool     variadic;
 	bool     require_results;
@@ -2991,7 +2993,22 @@ gb_internal Type *union_tag_type(Type *u) {
 	return t_uint;
 }
 
+gb_internal int matched_target_features(TypeProc *t) {
+	if (t->require_target_feature.len == 0) {
+		return 0;
+	}
 
+	int matches = 0;
+	String_Iterator it = {t->require_target_feature, 0};
+	for (;;) {
+		String str = string_split_iterator(&it, ',');
+		if (str == "") break;
+		if (check_target_feature_is_valid_for_target_arch(str, nullptr)) {
+			matches += 1;
+		}
+	}
+	return matches;
+}
 
 enum ProcTypeOverloadKind {
 	ProcOverload_Identical, // The types are identical
@@ -3003,6 +3020,7 @@ enum ProcTypeOverloadKind {
 	ProcOverload_ResultCount,
 	ProcOverload_ResultTypes,
 	ProcOverload_Polymorphic,
+	ProcOverload_TargetFeatures,
 
 	ProcOverload_NotProcedure,
 
@@ -3060,6 +3078,10 @@ gb_internal ProcTypeOverloadKind are_proc_types_overload_safe(Type *x, Type *y)
 		}
 	}
 
+	if (matched_target_features(&px) != matched_target_features(&py)) {
+		return ProcOverload_TargetFeatures;
+	}
+
 	if (px.params != nullptr && py.params != nullptr) {
 		Entity *ex = px.params->Tuple.variables[0];
 		Entity *ey = py.params->Tuple.variables[0];

Some files were not shown because too many files changed in this diff