Browse Source

Merge pull request #3024 from Yawning/fix/simd-x86

core:simd/x86: Various fixes
gingerBill 1 year ago
parent
commit
1e1228fb37

+ 2 - 0
.gitignore

@@ -47,6 +47,8 @@ tests/core/test_linalg_glsl_math
 tests/core/test_noise
 tests/core/test_varint
 tests/core/test_xml
+tests/core/test_core_slice
+tests/core/test_core_thread
 tests/vendor/vendor_botan
 # Visual Studio 2015 cache/options directory
 .vs/

+ 1 - 1
core/simd/x86/adx.odin

@@ -37,7 +37,7 @@ when ODIN_ARCH == .amd64 {
 	}
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.addcarry.32")
 	llvm_addcarry_u32  :: proc(a: u8, b: u32, c: u32) -> (u8, u32) ---

+ 1 - 1
core/simd/x86/fxsr.odin

@@ -21,7 +21,7 @@ when ODIN_ARCH == .amd64 {
 	}
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.fxsave")
 	fxsave    :: proc(p: rawptr) ---

+ 2 - 2
core/simd/x86/pclmulqdq.odin

@@ -1,12 +1,12 @@
 //+build i386, amd64
 package simd_x86
 
-@(require_results, enable_target_feature="pclmulqdq")
+@(require_results, enable_target_feature="pclmul")
 _mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
 	return pclmulqdq(a, b, u8(IMM8))
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.pclmulqdq")
 	pclmulqdq :: proc(a, round_key: __m128i, #const imm8: u8) -> __m128i ---

+ 1 - 1
core/simd/x86/rdtsc.odin

@@ -11,7 +11,7 @@ __rdtscp :: #force_inline proc "c" (aux: ^u32) -> u64 {
 	return rdtscp(aux)
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.rdtsc")
 	rdtsc  :: proc() -> u64 ---

+ 1 - 1
core/simd/x86/sha.odin

@@ -30,7 +30,7 @@ _mm_sha256rnds2_epu32 :: #force_inline proc "c" (a, b, k: __m128i) -> __m128i {
 	return transmute(__m128i)sha256rnds2(transmute(i32x4)a, transmute(i32x4)b, transmute(i32x4)k)
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.sha1msg1")
 	sha1msg1    :: proc(a, b: i32x4) -> i32x4 ---

+ 1 - 1
core/simd/x86/sse.odin

@@ -532,7 +532,7 @@ when ODIN_ARCH == .amd64 {
 }
 
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.sse.add.ss")
 	addss       :: proc(a, b: __m128) -> __m128 ---

+ 1 - 1
core/simd/x86/sse2.odin

@@ -1040,7 +1040,7 @@ when ODIN_ARCH == .amd64 {
 }
 
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name="llvm.x86.sse2.pause")
 	pause      :: proc() ---

+ 1 - 1
core/simd/x86/sse3.odin

@@ -49,7 +49,7 @@ _mm_moveldup_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
 	return simd.shuffle(a, a, 0, 0, 2, 2)
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name = "llvm.x86.sse3.addsub.ps")
 	addsubps :: proc(a, b: __m128) -> __m128 ---

+ 1 - 1
core/simd/x86/sse41.odin

@@ -291,7 +291,7 @@ when ODIN_ARCH == .amd64 {
 }
 
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name = "llvm.x86.sse41.pblendvb")
 	pblendvb   :: proc(a, b: i8x16, mask: i8x16) -> i8x16 ---

+ 1 - 1
core/simd/x86/sse42.odin

@@ -104,7 +104,7 @@ when ODIN_ARCH == .amd64 {
 	}
 }
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	// SSE 4.2 string and text comparison ops
 	@(link_name="llvm.x86.sse42.pcmpestrm128")

+ 1 - 1
core/simd/x86/ssse3.odin

@@ -105,7 +105,7 @@ _mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
 
 
 
-@(private, default_calling_convention="c")
+@(private, default_calling_convention="none")
 foreign _ {
 	@(link_name = "llvm.x86.ssse3.pabs.b.128")
 	pabsb128     :: proc(a: i8x16) -> u8x16 ---

+ 3 - 1
src/build_settings.cpp

@@ -1493,7 +1493,7 @@ gb_internal void enable_target_feature(TokenPos pos, String const &target_featur
 }
 
 
-gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes) {
+gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) {
 	isize len = 0;
 	isize i = 0;
 	for (String const &feature : build_context.target_features_set) {
@@ -1502,6 +1502,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo
 		}
 		len += feature.len;
 		if (with_quotes) len += 2;
+		if (with_plus) len += 1;
 		i += 1;
 	}
 	char *features = gb_alloc_array(allocator, char, len+1);
@@ -1513,6 +1514,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo
 		}
 
 		if (with_quotes) features[len++] = '"';
+		if (with_plus) features[len++] = '+';
 		gb_memmove(features + len, feature.text, feature.len);
 		len += feature.len;
 		if (with_quotes) features[len++] = '"';

+ 40 - 1
src/llvm_backend.cpp

@@ -2531,7 +2531,46 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
 	*/
 
 	if (build_context.target_features_set.entries.count != 0) {
-		llvm_features = target_features_set_to_cstring(permanent_allocator(), false);
+		// Prefix all of the features with a `+`, because we are
+		// enabling additional features.
+		char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true);
+
+		String f_string = make_string_c(llvm_features);
+		String a_string = make_string_c(additional_features);
+		isize f_len = f_string.len;
+
+		if (f_len == 0) {
+			// The common case is that llvm_features is empty, so
+			// the target_features_set additions can be used as is.
+			llvm_features = additional_features;
+		} else {
+			// The user probably specified `-microarch:native`, so
+			// llvm_features is populated by LLVM's idea of what
+			// the host CPU supports.
+			//
+			// As far as I can tell, (which is barely better than
+			// wild guessing), a bitset is formed by parsing the
+			// string left to right.
+			//
+			// So, llvm_features + ',' + additonal_features, will
+			// makes the target_features_set override llvm_features.
+
+			char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1);
+			isize len = 0;
+
+			// tmp = f_string
+			gb_memmove(tmp, f_string.text, f_string.len);
+			len += f_string.len;
+			// tmp += ','
+			tmp[len++] = ',';
+			// tmp += a_string
+			gb_memmove(tmp + len, a_string.text, a_string.len);
+			len += a_string.len;
+			// tmp += NUL
+			tmp[len++] = 0;
+
+			llvm_features = tmp;
+		}
 	}
 
 	// GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target));