5 years ago · 1dafe00200
--- a/3rdparty/meshoptimizer/src/vertexcodec.cpp
+++ b/3rdparty/meshoptimizer/src/vertexcodec.cpp
@@ -42,10 +42,8 @@
 
				 #endif
			
 
				 
			
 
				 // When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
			
 
				-// Note that we need unimplemented-simd128 subset for a few functions that are implemented de-facto
			
 
				 #if defined(__wasm_simd128__)
			
 
				 #define SIMD_WASM
			
 
				-#define SIMD_TARGET __attribute__((target("unimplemented-simd128")))
			
 
				 #endif
			
 
				 
			
 
				 #ifndef SIMD_TARGET
			
@@ -100,15 +98,6 @@
 
				 #define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3)
			
 
				 #endif
			
 
				 
			
 
				-#if defined(SIMD_WASM)
			
 
				-// v128_t wasm_v8x16_swizzle(v128_t a, v128_t b)
			
 
				-SIMD_TARGET
			
 
				-static __inline__ v128_t wasm_v8x16_swizzle(v128_t a, v128_t b)
			
 
				-{
			
 
				-	return (v128_t)__builtin_wasm_swizzle_v8x16((__i8x16)a, (__i8x16)b);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 namespace meshopt
			
 
				 {
			
 
				 
			
@@ -769,6 +758,7 @@ static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1
 
				 	uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
			
 
				 	uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
			
 
				 
			
 
				+	// TODO: This can use v8x16_bitmask in the future
			
 
				 	uint64_t mask_2 = mask_1a | mask_1b;
			
 
				 	uint64_t mask_4 = mask_2 | (mask_2 >> 16);
			
 
				 	uint64_t mask_8 = mask_4 | (mask_4 >> 8);
			
--- a/3rdparty/meshoptimizer/src/vertexfilter.cpp
+++ b/3rdparty/meshoptimizer/src/vertexfilter.cpp
@@ -735,7 +735,8 @@ static void decodeFilterQuatSimd(short* data, size_t count)
 
				 		v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr);
			
 
				 
			
 
				 		// compute component index shifted left by 4 (and moved into i32x4 slot)
			
 
				-		v128_t cm = wasm_i32x4_shl(cf, 4);
			
 
				+		// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449
			
 
				+		volatile v128_t cm = wasm_i32x4_shl(cf, 4);
			
 
				 
			
 
				 		// rotate and store
			
 
				 		uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);