Browse Source

Updated spirv-cross.

Бранимир Караџић 3 years ago
parent
commit
b61c3cc15a

+ 36 - 7
3rdparty/spirv-cross/main.cpp

@@ -681,7 +681,8 @@ struct CLIArguments
 	SmallVector<uint32_t> msl_device_argument_buffers;
 	SmallVector<pair<uint32_t, uint32_t>> msl_dynamic_buffers;
 	SmallVector<pair<uint32_t, uint32_t>> msl_inline_uniform_blocks;
-	SmallVector<MSLShaderInput> msl_shader_inputs;
+	SmallVector<MSLShaderInterfaceVariable> msl_shader_inputs;
+	SmallVector<MSLShaderInterfaceVariable> msl_shader_outputs;
 	SmallVector<PLSArg> pls_in;
 	SmallVector<PLSArg> pls_out;
 	SmallVector<Remap> remaps;
@@ -874,6 +875,10 @@ static void print_help_msl()
 	                "\t\t<format> can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, "
 	                "or other-typed variable. <size> is the vector length of the variable, which must be greater than or equal to that declared in the shader.\n"
 	                "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n"
+	                "\t[--msl-shader-output <index> <format> <size>]:\n\t\tSpecify the format of the shader output at <index>.\n"
+	                "\t\t<format> can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, "
+	                "or other-typed variable. <size> is the vector length of the variable, which must be greater than or equal to that declared in the shader.\n"
+	                "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n"
 	                "\t[--msl-multi-patch-workgroup]:\n\t\tUse the new style of tessellation control processing, where multiple patches are processed per workgroup.\n"
 					"\t\tThis should increase throughput by ensuring all the GPU's SIMD lanes are occupied, but it is not compatible with the old style.\n"
 					"\t\tIn addition, this style also passes input variables in buffers directly instead of using vertex attribute processing.\n"
@@ -1082,6 +1087,10 @@ static ExecutionModel stage_to_execution_model(const std::string &stage)
 		return ExecutionModelMissKHR;
 	else if (stage == "rcall")
 		return ExecutionModelCallableKHR;
+	else if (stage == "mesh")
+		return spv::ExecutionModelMeshEXT;
+	else if (stage == "task")
+		return spv::ExecutionModelTaskEXT;
 	else
 		SPIRV_CROSS_THROW("Invalid stage.");
 }
@@ -1178,6 +1187,8 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 			msl_comp->add_inline_uniform_block(v.first, v.second);
 		for (auto &v : args.msl_shader_inputs)
 			msl_comp->add_msl_shader_input(v);
+		for (auto &v : args.msl_shader_outputs)
+			msl_comp->add_msl_shader_output(v);
 		if (args.msl_combined_sampler_suffix)
 			msl_comp->set_combined_sampler_suffix(args.msl_combined_sampler_suffix);
 	}
@@ -1581,23 +1592,41 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--msl-no-clip-distance-user-varying",
 	        [&args](CLIParser &) { args.msl_enable_clip_distance_user_varying = false; });
 	cbs.add("--msl-shader-input", [&args](CLIParser &parser) {
-		MSLShaderInput input;
+		MSLShaderInterfaceVariable input;
 		// Make sure next_uint() is called in-order.
 		input.location = parser.next_uint();
 		const char *format = parser.next_value_string("other");
 		if (strcmp(format, "any32") == 0)
-			input.format = MSL_SHADER_INPUT_FORMAT_ANY32;
+			input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32;
 		else if (strcmp(format, "any16") == 0)
-			input.format = MSL_SHADER_INPUT_FORMAT_ANY16;
+			input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16;
 		else if (strcmp(format, "u16") == 0)
-			input.format = MSL_SHADER_INPUT_FORMAT_UINT16;
+			input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16;
 		else if (strcmp(format, "u8") == 0)
-			input.format = MSL_SHADER_INPUT_FORMAT_UINT8;
+			input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8;
 		else
-			input.format = MSL_SHADER_INPUT_FORMAT_OTHER;
+			input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
 		input.vecsize = parser.next_uint();
 		args.msl_shader_inputs.push_back(input);
 	});
+	cbs.add("--msl-shader-output", [&args](CLIParser &parser) {
+		MSLShaderInterfaceVariable output;
+		// Make sure next_uint() is called in-order.
+		output.location = parser.next_uint();
+		const char *format = parser.next_value_string("other");
+		if (strcmp(format, "any32") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32;
+		else if (strcmp(format, "any16") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16;
+		else if (strcmp(format, "u16") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16;
+		else if (strcmp(format, "u8") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8;
+		else
+			output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
+		output.vecsize = parser.next_uint();
+		args.msl_shader_outputs.push_back(output);
+	});
 	cbs.add("--msl-multi-patch-workgroup", [&args](CLIParser &) { args.msl_multi_patch_workgroup = true; });
 	cbs.add("--msl-vertex-for-tessellation", [&args](CLIParser &) { args.msl_vertex_for_tessellation = true; });
 	cbs.add("--msl-additional-fixed-sample-mask",

+ 23 - 0
3rdparty/spirv-cross/spirv.h

@@ -98,6 +98,8 @@ typedef enum SpvExecutionModel_ {
     SpvExecutionModelMissNV = 5317,
     SpvExecutionModelCallableKHR = 5318,
     SpvExecutionModelCallableNV = 5318,
+    SpvExecutionModelTaskEXT = 5364,
+    SpvExecutionModelMeshEXT = 5365,
     SpvExecutionModelMax = 0x7fffffff,
 } SpvExecutionModel;
 
@@ -165,11 +167,21 @@ typedef enum SpvExecutionMode_ {
     SpvExecutionModeSignedZeroInfNanPreserve = 4461,
     SpvExecutionModeRoundingModeRTE = 4462,
     SpvExecutionModeRoundingModeRTZ = 4463,
+    SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
     SpvExecutionModeStencilRefReplacingEXT = 5027,
+    SpvExecutionModeStencilRefUnchangedFrontAMD = 5079,
+    SpvExecutionModeStencilRefGreaterFrontAMD = 5080,
+    SpvExecutionModeStencilRefLessFrontAMD = 5081,
+    SpvExecutionModeStencilRefUnchangedBackAMD = 5082,
+    SpvExecutionModeStencilRefGreaterBackAMD = 5083,
+    SpvExecutionModeStencilRefLessBackAMD = 5084,
+    SpvExecutionModeOutputLinesEXT = 5269,
     SpvExecutionModeOutputLinesNV = 5269,
+    SpvExecutionModeOutputPrimitivesEXT = 5270,
     SpvExecutionModeOutputPrimitivesNV = 5270,
     SpvExecutionModeDerivativeGroupQuadsNV = 5289,
     SpvExecutionModeDerivativeGroupLinearNV = 5290,
+    SpvExecutionModeOutputTrianglesEXT = 5298,
     SpvExecutionModeOutputTrianglesNV = 5298,
     SpvExecutionModePixelInterlockOrderedEXT = 5366,
     SpvExecutionModePixelInterlockUnorderedEXT = 5367,
@@ -219,6 +231,7 @@ typedef enum SpvStorageClass_ {
     SpvStorageClassShaderRecordBufferNV = 5343,
     SpvStorageClassPhysicalStorageBuffer = 5349,
     SpvStorageClassPhysicalStorageBufferEXT = 5349,
+    SpvStorageClassTaskPayloadWorkgroupEXT = 5402,
     SpvStorageClassCodeSectionINTEL = 5605,
     SpvStorageClassDeviceOnlyINTEL = 5936,
     SpvStorageClassHostOnlyINTEL = 5937,
@@ -501,6 +514,7 @@ typedef enum SpvDecoration_ {
     SpvDecorationPassthroughNV = 5250,
     SpvDecorationViewportRelativeNV = 5252,
     SpvDecorationSecondaryViewportRelativeNV = 5256,
+    SpvDecorationPerPrimitiveEXT = 5271,
     SpvDecorationPerPrimitiveNV = 5271,
     SpvDecorationPerViewNV = 5272,
     SpvDecorationPerTaskNV = 5273,
@@ -650,6 +664,10 @@ typedef enum SpvBuiltIn_ {
     SpvBuiltInFragmentSizeNV = 5292,
     SpvBuiltInFragInvocationCountEXT = 5293,
     SpvBuiltInInvocationsPerPixelNV = 5293,
+    SpvBuiltInPrimitivePointIndicesEXT = 5294,
+    SpvBuiltInPrimitiveLineIndicesEXT = 5295,
+    SpvBuiltInPrimitiveTriangleIndicesEXT = 5296,
+    SpvBuiltInCullPrimitiveEXT = 5299,
     SpvBuiltInLaunchIdKHR = 5319,
     SpvBuiltInLaunchIdNV = 5319,
     SpvBuiltInLaunchSizeKHR = 5320,
@@ -990,6 +1008,7 @@ typedef enum SpvCapability_ {
     SpvCapabilityFragmentFullyCoveredEXT = 5265,
     SpvCapabilityMeshShadingNV = 5266,
     SpvCapabilityImageFootprintNV = 5282,
+    SpvCapabilityMeshShadingEXT = 5283,
     SpvCapabilityFragmentBarycentricKHR = 5284,
     SpvCapabilityFragmentBarycentricNV = 5284,
     SpvCapabilityComputeDerivativeGroupQuadsNV = 5288,
@@ -1589,6 +1608,8 @@ typedef enum SpvOp_ {
     SpvOpFragmentFetchAMD = 5012,
     SpvOpReadClockKHR = 5056,
     SpvOpImageSampleFootprintNV = 5283,
+    SpvOpEmitMeshTasksEXT = 5294,
+    SpvOpSetMeshOutputsEXT = 5295,
     SpvOpGroupNonUniformPartitionNV = 5296,
     SpvOpWritePackedPrimitiveIndices4x8NV = 5299,
     SpvOpReportIntersectionKHR = 5334,
@@ -2262,6 +2283,8 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy
     case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
     case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break;
     case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
     case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
     case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
     case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;

+ 23 - 0
3rdparty/spirv-cross/spirv.hpp

@@ -94,6 +94,8 @@ enum ExecutionModel {
     ExecutionModelMissNV = 5317,
     ExecutionModelCallableKHR = 5318,
     ExecutionModelCallableNV = 5318,
+    ExecutionModelTaskEXT = 5364,
+    ExecutionModelMeshEXT = 5365,
     ExecutionModelMax = 0x7fffffff,
 };
 
@@ -161,11 +163,21 @@ enum ExecutionMode {
     ExecutionModeSignedZeroInfNanPreserve = 4461,
     ExecutionModeRoundingModeRTE = 4462,
     ExecutionModeRoundingModeRTZ = 4463,
+    ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
     ExecutionModeStencilRefReplacingEXT = 5027,
+    ExecutionModeStencilRefUnchangedFrontAMD = 5079,
+    ExecutionModeStencilRefGreaterFrontAMD = 5080,
+    ExecutionModeStencilRefLessFrontAMD = 5081,
+    ExecutionModeStencilRefUnchangedBackAMD = 5082,
+    ExecutionModeStencilRefGreaterBackAMD = 5083,
+    ExecutionModeStencilRefLessBackAMD = 5084,
+    ExecutionModeOutputLinesEXT = 5269,
     ExecutionModeOutputLinesNV = 5269,
+    ExecutionModeOutputPrimitivesEXT = 5270,
     ExecutionModeOutputPrimitivesNV = 5270,
     ExecutionModeDerivativeGroupQuadsNV = 5289,
     ExecutionModeDerivativeGroupLinearNV = 5290,
+    ExecutionModeOutputTrianglesEXT = 5298,
     ExecutionModeOutputTrianglesNV = 5298,
     ExecutionModePixelInterlockOrderedEXT = 5366,
     ExecutionModePixelInterlockUnorderedEXT = 5367,
@@ -215,6 +227,7 @@ enum StorageClass {
     StorageClassShaderRecordBufferNV = 5343,
     StorageClassPhysicalStorageBuffer = 5349,
     StorageClassPhysicalStorageBufferEXT = 5349,
+    StorageClassTaskPayloadWorkgroupEXT = 5402,
     StorageClassCodeSectionINTEL = 5605,
     StorageClassDeviceOnlyINTEL = 5936,
     StorageClassHostOnlyINTEL = 5937,
@@ -497,6 +510,7 @@ enum Decoration {
     DecorationPassthroughNV = 5250,
     DecorationViewportRelativeNV = 5252,
     DecorationSecondaryViewportRelativeNV = 5256,
+    DecorationPerPrimitiveEXT = 5271,
     DecorationPerPrimitiveNV = 5271,
     DecorationPerViewNV = 5272,
     DecorationPerTaskNV = 5273,
@@ -646,6 +660,10 @@ enum BuiltIn {
     BuiltInFragmentSizeNV = 5292,
     BuiltInFragInvocationCountEXT = 5293,
     BuiltInInvocationsPerPixelNV = 5293,
+    BuiltInPrimitivePointIndicesEXT = 5294,
+    BuiltInPrimitiveLineIndicesEXT = 5295,
+    BuiltInPrimitiveTriangleIndicesEXT = 5296,
+    BuiltInCullPrimitiveEXT = 5299,
     BuiltInLaunchIdKHR = 5319,
     BuiltInLaunchIdNV = 5319,
     BuiltInLaunchSizeKHR = 5320,
@@ -986,6 +1004,7 @@ enum Capability {
     CapabilityFragmentFullyCoveredEXT = 5265,
     CapabilityMeshShadingNV = 5266,
     CapabilityImageFootprintNV = 5282,
+    CapabilityMeshShadingEXT = 5283,
     CapabilityFragmentBarycentricKHR = 5284,
     CapabilityFragmentBarycentricNV = 5284,
     CapabilityComputeDerivativeGroupQuadsNV = 5288,
@@ -1585,6 +1604,8 @@ enum Op {
     OpFragmentFetchAMD = 5012,
     OpReadClockKHR = 5056,
     OpImageSampleFootprintNV = 5283,
+    OpEmitMeshTasksEXT = 5294,
+    OpSetMeshOutputsEXT = 5295,
     OpGroupNonUniformPartitionNV = 5296,
     OpWritePackedPrimitiveIndices4x8NV = 5299,
     OpReportIntersectionKHR = 5334,
@@ -2258,6 +2279,8 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
     case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
     case OpReadClockKHR: *hasResult = true; *hasResultType = true; break;
     case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+    case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
+    case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
     case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
     case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
     case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;

+ 16 - 1
3rdparty/spirv-cross/spirv_common.hpp

@@ -682,6 +682,7 @@ struct SPIREntryPoint
 	} workgroup_size;
 	uint32_t invocations = 0;
 	uint32_t output_vertices = 0;
+	uint32_t output_primitives = 0;
 	spv::ExecutionModel model = spv::ExecutionModelMax;
 	bool geometry_passthrough = false;
 };
@@ -776,7 +777,8 @@ struct SPIRBlock : IVariant
 		Unreachable, // Noop
 		Kill, // Discard
 		IgnoreIntersection, // Ray Tracing
-		TerminateRay // Ray Tracing
+		TerminateRay, // Ray Tracing
+		EmitMeshTasks // Mesh shaders
 	};
 
 	enum Merge
@@ -838,6 +840,13 @@ struct SPIRBlock : IVariant
 	BlockID false_block = 0;
 	BlockID default_block = 0;
 
+	// If terminator is EmitMeshTasksEXT.
+	struct
+	{
+		ID groups[3];
+		ID payload;
+	} mesh = {};
+
 	SmallVector<Instruction> ops;
 
 	struct Phi
@@ -1636,6 +1645,12 @@ enum ExtendedDecorations
 	// results of interpolation can.
 	SPIRVCrossDecorationInterpolantComponentExpr,
 
+	// Apply to any struct type that is used in the Workgroup storage class.
+	// This causes matrices in MSL prior to Metal 3.0 to be emitted using a special
+	// class that is convertible to the standard matrix type, to work around the
+	// lack of constructors in the 'threadgroup' address space.
+	SPIRVCrossDecorationWorkgroupStruct,
+
 	SPIRVCrossDecorationCount
 };
 

+ 35 - 1
3rdparty/spirv-cross/spirv_cross.cpp

@@ -98,7 +98,8 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 	// This is a global side effect of the function.
 	if (block.terminator == SPIRBlock::Kill ||
 	    block.terminator == SPIRBlock::TerminateRay ||
-	    block.terminator == SPIRBlock::IgnoreIntersection)
+	    block.terminator == SPIRBlock::IgnoreIntersection ||
+	    block.terminator == SPIRBlock::EmitMeshTasks)
 		return false;
 
 	for (auto &i : block.ops)
@@ -154,6 +155,11 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 		case OpEmitVertex:
 			return false;
 
+		// Mesh shader functions modify global state.
+		// (EmitMeshTasks is a terminator).
+		case OpSetMeshOutputsEXT:
+			return false;
+
 		// Barriers disallow any reordering, so we should treat blocks with barrier as writing.
 		case OpControlBarrier:
 		case OpMemoryBarrier:
@@ -1069,8 +1075,11 @@ void Compiler::parse_fixup()
 		{
 			auto &var = id.get<SPIRVariable>();
 			if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
+			    var.storage == StorageClassTaskPayloadWorkgroupEXT ||
 			    var.storage == StorageClassOutput)
+			{
 				global_variables.push_back(var.self);
+			}
 			if (variable_storage_is_aliased(var))
 				aliased_variables.push_back(var.self);
 		}
@@ -2177,6 +2186,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 		execution.output_vertices = arg0;
 		break;
 
+	case ExecutionModeOutputPrimitivesEXT:
+		execution.output_primitives = arg0;
+		break;
+
 	default:
 		break;
 	}
@@ -2297,6 +2310,9 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t
 	case ExecutionModeOutputVertices:
 		return execution.output_vertices;
 
+	case ExecutionModeOutputPrimitivesEXT:
+		return execution.output_primitives;
+
 	default:
 		return 0;
 	}
@@ -2359,6 +2375,19 @@ void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
 		e.implied_read_expressions.push_back(source);
 }
 
+void Compiler::add_active_interface_variable(uint32_t var_id)
+{
+	active_interface_variables.insert(var_id);
+
+	// In SPIR-V 1.4 and up we must also track the interface variable in the entry point.
+	if (ir.get_spirv_version() >= 0x10400)
+	{
+		auto &vars = get_entry_point().interface_variables;
+		if (find(begin(vars), end(vars), VariableID(var_id)) == end(vars))
+			vars.push_back(var_id);
+	}
+}
+
 void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
 {
 	// Don't inherit any expression dependencies if the expression in dst
@@ -4410,6 +4439,7 @@ void Compiler::analyze_image_and_sampler_usage()
 
 	comparison_ids = std::move(handler.comparison_ids);
 	need_subpass_input = handler.need_subpass_input;
+	need_subpass_input_ms = handler.need_subpass_input_ms;
 
 	// Forward information from separate images and samplers into combined image samplers.
 	for (auto &combined : combined_image_samplers)
@@ -4576,7 +4606,11 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 		// If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
 		auto &type = compiler.get<SPIRType>(args[0]);
 		if (type.image.dim == DimSubpassData)
+		{
 			need_subpass_input = true;
+			if (type.image.ms)
+				need_subpass_input_ms = true;
+		}
 
 		// If we load a SampledImage and it will be used with Dref, propagate the state up.
 		if (dref_combined_samplers.count(args[1]) != 0)

+ 3 - 0
3rdparty/spirv-cross/spirv_cross.hpp

@@ -755,6 +755,7 @@ protected:
 	void inherit_expression_dependencies(uint32_t dst, uint32_t source);
 	void add_implied_read_expression(SPIRExpression &e, uint32_t source);
 	void add_implied_read_expression(SPIRAccessChain &e, uint32_t source);
+	void add_active_interface_variable(uint32_t var_id);
 
 	// For proper multiple entry point support, allow querying if an Input or Output
 	// variable is part of that entry points interface.
@@ -930,6 +931,7 @@ protected:
 	// Similar is implemented for images, as well as if subpass inputs are needed.
 	std::unordered_set<uint32_t> comparison_ids;
 	bool need_subpass_input = false;
+	bool need_subpass_input_ms = false;
 
 	// In certain backends, we will need to use a dummy sampler to be able to emit code.
 	// GLSL does not support texelFetch on texture2D objects, but SPIR-V does,
@@ -969,6 +971,7 @@ protected:
 
 		void add_hierarchy_to_comparison_ids(uint32_t ids);
 		bool need_subpass_input = false;
+		bool need_subpass_input_ms = false;
 		void add_dependency(uint32_t dst, uint32_t src);
 	};
 

+ 60 - 13
3rdparty/spirv-cross/spirv_cross_c.cpp

@@ -1136,9 +1136,9 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const
 	}
 
 	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
-	MSLShaderInput attr;
+	MSLShaderInterfaceVariable attr;
 	attr.location = va->location;
-	attr.format = static_cast<MSLShaderInputFormat>(va->format);
+	attr.format = static_cast<MSLShaderVariableFormat>(va->format);
 	attr.builtin = static_cast<spv::BuiltIn>(va->builtin);
 	msl.add_msl_shader_input(attr);
 	return SPVC_SUCCESS;
@@ -1149,7 +1149,7 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const
 #endif
 }
 
-spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_input *si)
+spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_interface_var *si)
 {
 #if SPIRV_CROSS_C_API_MSL
 	if (compiler->backend != SPVC_BACKEND_MSL)
@@ -1159,9 +1159,9 @@ spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spv
 	}
 
 	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
-	MSLShaderInput input;
+	MSLShaderInterfaceVariable input;
 	input.location = si->location;
-	input.format = static_cast<MSLShaderInputFormat>(si->format);
+	input.format = static_cast<MSLShaderVariableFormat>(si->format);
 	input.builtin = static_cast<spv::BuiltIn>(si->builtin);
 	input.vecsize = si->vecsize;
 	msl.add_msl_shader_input(input);
@@ -1173,6 +1173,30 @@ spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spv
 #endif
 }
 
+spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, const spvc_msl_shader_interface_var *so)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLShaderInterfaceVariable output;
+	output.location = so->location;
+	output.format = static_cast<MSLShaderVariableFormat>(so->format);
+	output.builtin = static_cast<spv::BuiltIn>(so->builtin);
+	output.vecsize = so->vecsize;
+	msl.add_msl_shader_output(output);
+	return SPVC_SUCCESS;
+#else
+	(void)so;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
                                                    const spvc_msl_resource_binding *binding)
 {
@@ -1298,6 +1322,24 @@ spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigne
 #endif
 }
 
+spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_FALSE;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	return msl.is_msl_shader_output_used(location) ? SPVC_TRUE : SPVC_FALSE;
+#else
+	(void)location;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_FALSE;
+#endif
+}
+
 spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location)
 {
 	return spvc_compiler_msl_is_shader_input_used(compiler, location);
@@ -2511,7 +2553,7 @@ void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr)
 {
 #if SPIRV_CROSS_C_API_MSL
 	// Crude, but works.
-	MSLShaderInput attr_default;
+	MSLShaderInterfaceVariable attr_default;
 	attr->location = attr_default.location;
 	attr->format = static_cast<spvc_msl_vertex_format>(attr_default.format);
 	attr->builtin = static_cast<SpvBuiltIn>(attr_default.builtin);
@@ -2520,19 +2562,24 @@ void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr)
 #endif
 }
 
-void spvc_msl_shader_input_init(spvc_msl_shader_input *input)
+void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var)
 {
 #if SPIRV_CROSS_C_API_MSL
-	MSLShaderInput input_default;
-	input->location = input_default.location;
-	input->format = static_cast<spvc_msl_shader_input_format>(input_default.format);
-	input->builtin = static_cast<SpvBuiltIn>(input_default.builtin);
-	input->vecsize = input_default.vecsize;
+	MSLShaderInterfaceVariable var_default;
+	var->location = var_default.location;
+	var->format = static_cast<spvc_msl_shader_variable_format>(var_default.format);
+	var->builtin = static_cast<SpvBuiltIn>(var_default.builtin);
+	var->vecsize = var_default.vecsize;
 #else
-	memset(input, 0, sizeof(*input));
+	memset(var, 0, sizeof(*var));
 #endif
 }
 
+void spvc_msl_shader_input_init(spvc_msl_shader_input *input)
+{
+	spvc_msl_shader_interface_var_init(input);
+}
+
 void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding)
 {
 #if SPIRV_CROSS_C_API_MSL

+ 27 - 14
3rdparty/spirv-cross/spirv_cross_c.h

@@ -290,23 +290,29 @@ typedef enum spvc_msl_index_type
 } spvc_msl_index_type;
 
 /* Maps to C++ API. */
-typedef enum spvc_msl_shader_input_format
+typedef enum spvc_msl_shader_variable_format
 {
-	SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = 0,
-	SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = 1,
-	SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = 2,
-	SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = 3,
-	SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = 4,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER = 0,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4,
 
 	/* Deprecated names. */
-	SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_INPUT_FORMAT_OTHER,
-	SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_INPUT_FORMAT_UINT8,
-	SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_INPUT_FORMAT_UINT16,
+	SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16,
+	SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32,
+
 
 	SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff
-} spvc_msl_shader_input_format, spvc_msl_vertex_format;
+} spvc_msl_shader_variable_format, spvc_msl_shader_input_format, spvc_msl_vertex_format;
 
-/* Maps to C++ API. Deprecated; use spvc_msl_shader_input. */
+/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var. */
 typedef struct spvc_msl_vertex_attribute
 {
 	unsigned location;
@@ -330,17 +336,21 @@ typedef struct spvc_msl_vertex_attribute
 SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr);
 
 /* Maps to C++ API. */
-typedef struct spvc_msl_shader_input
+typedef struct spvc_msl_shader_interface_var
 {
 	unsigned location;
 	spvc_msl_vertex_format format;
 	SpvBuiltIn builtin;
 	unsigned vecsize;
-} spvc_msl_shader_input;
+} spvc_msl_shader_interface_var, spvc_msl_shader_input;
 
 /*
  * Initializes the shader input struct.
  */
+SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var);
+/*
+ * Deprecated. Use spvc_msl_shader_interface_var_init().
+ */
 SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input);
 
 /* Maps to C++ API. */
@@ -786,13 +796,16 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
                                                                    const spvc_msl_resource_binding *binding);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler,
-                                                               const spvc_msl_shader_input *input);
+                                                               const spvc_msl_shader_interface_var *input);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler,
+                                                                const spvc_msl_shader_interface_var *output);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address);
 
 /* Obsolete, use is_shader_input_used. */
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location);
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location);
 
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler,
                                                              SpvExecutionModel model,

+ 4 - 0
3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp

@@ -330,6 +330,10 @@ void ParsedIR::fixup_reserved_names()
 {
 	for (uint32_t id : meta_needing_name_fixup)
 	{
+		// Don't rename remapped variables like 'gl_LastFragDepthARM'.
+		if (ids[id].get_type() == TypeVariable && get<SPIRVariable>(id).remapped_variable)
+			continue;
+
 		auto &m = meta[id];
 		sanitize_identifier(m.decoration.alias, false, false);
 		for (auto &memb : m.members)

+ 167 - 31
3rdparty/spirv-cross/spirv_glsl.cpp

@@ -497,6 +497,15 @@ void CompilerGLSL::find_static_extensions()
 			require_extension_internal("GL_NV_ray_tracing");
 		break;
 
+	case ExecutionModelMeshEXT:
+	case ExecutionModelTaskEXT:
+		if (options.es || options.version < 450)
+			SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
+		require_extension_internal("GL_EXT_mesh_shader");
+		break;
+
 	default:
 		break;
 	}
@@ -649,7 +658,7 @@ string CompilerGLSL::compile()
 	{
 		// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
 		backend.nonuniform_qualifier = "";
-		backend.needs_row_major_load_workaround = true;
+		backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
 	}
 	backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
 	backend.force_gl_in_out_block = true;
@@ -1060,6 +1069,8 @@ void CompilerGLSL::emit_header()
 		break;
 
 	case ExecutionModelGLCompute:
+	case ExecutionModelTaskEXT:
+	case ExecutionModelMeshEXT:
 	{
 		if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
 		{
@@ -1078,6 +1089,18 @@ void CompilerGLSL::emit_header()
 			inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
 			inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
 		}
+
+		if (execution.model == ExecutionModelMeshEXT)
+		{
+			outputs.push_back(join("max_vertices = ", execution.output_vertices));
+			outputs.push_back(join("max_primitives = ", execution.output_primitives));
+			if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
+				outputs.push_back("triangles");
+			else if (execution.flags.get(ExecutionModeOutputLinesEXT))
+				outputs.push_back("lines");
+			else if (execution.flags.get(ExecutionModeOutputPoints))
+				outputs.push_back("points");
+		}
 		break;
 	}
 
@@ -1235,6 +1258,8 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
 		res += "sample ";
 	if (flags.get(DecorationInvariant))
 		res += "invariant ";
+	if (flags.get(DecorationPerPrimitiveEXT))
+	    res += "perprimitiveEXT ";
 
 	if (flags.get(DecorationExplicitInterpAMD))
 	{
@@ -2624,7 +2649,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 			}
 
 			// Workaround to make sure we can emit "patch in/out" correctly.
-			fixup_io_block_patch_qualifiers(var);
+			fixup_io_block_patch_primitive_qualifiers(var);
 
 			// Block names should never alias.
 			auto block_name = to_name(type.self, false);
@@ -2647,8 +2672,15 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 			// Instance names cannot alias block names.
 			resource_names.insert(block_name);
 
-			bool is_patch = has_decoration(var.self, DecorationPatch);
-			statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
+			const char *block_qualifier;
+			if (has_decoration(var.self, DecorationPatch))
+				block_qualifier = "patch ";
+			else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
+				block_qualifier = "perprimitiveEXT ";
+			else
+				block_qualifier = "";
+
+			statement(layout_for_variable(var), block_qualifier, qual, block_name);
 			begin_scope();
 
 			type.member_name_cache.clear();
@@ -3084,7 +3116,8 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
 	});
 
 	// If we're declaring clip/cull planes with control points we need to force block declaration.
-	if (get_execution_model() == ExecutionModelTessellationControl &&
+	if ((get_execution_model() == ExecutionModelTessellationControl ||
+	     get_execution_model() == ExecutionModelMeshEXT) &&
 	    (clip_distance_count || cull_distance_count))
 	{
 		should_force = true;
@@ -3093,7 +3126,7 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
 	return should_force;
 }
 
-void CompilerGLSL::fixup_implicit_builtin_block_names()
+void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
 {
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
@@ -3101,11 +3134,22 @@ void CompilerGLSL::fixup_implicit_builtin_block_names()
 		if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
 		    is_builtin_variable(var))
 		{
-			// Make sure the array has a supported name in the code.
-			if (var.storage == StorageClassOutput)
-				set_name(var.self, "gl_out");
-			else if (var.storage == StorageClassInput)
-				set_name(var.self, "gl_in");
+			if (model != ExecutionModelMeshEXT)
+			{
+				// Make sure the array has a supported name in the code.
+				if (var.storage == StorageClassOutput)
+					set_name(var.self, "gl_out");
+				else if (var.storage == StorageClassInput)
+					set_name(var.self, "gl_in");
+			}
+			else
+			{
+				auto flags = get_buffer_block_flags(var.self);
+				if (flags.get(DecorationPerPrimitiveEXT))
+					set_name(var.self, "gl_MeshPrimitivesEXT");
+				else
+					set_name(var.self, "gl_MeshVerticesEXT");
+			}
 		}
 	});
 }
@@ -3129,6 +3173,11 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 	uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
 	std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
 
+	const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
+		return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
+			builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
+	};
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
 		bool block = has_decoration(type.self, DecorationBlock);
@@ -3139,7 +3188,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 			uint32_t index = 0;
 			for (auto &m : ir.meta[type.self].members)
 			{
-				if (m.builtin)
+				if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
 				{
 					builtins.set(m.builtin_type);
 					if (m.builtin_type == BuiltInCullDistance)
@@ -3192,7 +3241,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 		{
 			// While we're at it, collect all declared global builtins (HLSL mostly ...).
 			auto &m = ir.meta[var.self].decoration;
-			if (m.builtin)
+			if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
 			{
 				global_builtins.set(m.builtin_type);
 				if (m.builtin_type == BuiltInCullDistance)
@@ -3281,7 +3330,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 			attr.push_back(join("stream = ", geom_stream));
 		}
 
-		if (!attr.empty())
+		if (model == ExecutionModelMeshEXT)
+			statement("out gl_MeshPerVertexEXT");
+		else if (!attr.empty())
 			statement("layout(", merge(attr), ") out gl_PerVertex");
 		else
 			statement("out gl_PerVertex");
@@ -3399,7 +3450,8 @@ void CompilerGLSL::emit_resources()
 	case ExecutionModelGeometry:
 	case ExecutionModelTessellationControl:
 	case ExecutionModelTessellationEvaluation:
-		fixup_implicit_builtin_block_names();
+	case ExecutionModelMeshEXT:
+		fixup_implicit_builtin_block_names(execution.model);
 		break;
 
 	default:
@@ -3419,6 +3471,7 @@ void CompilerGLSL::emit_resources()
 			break;
 
 		case ExecutionModelVertex:
+		case ExecutionModelMeshEXT:
 			emit_declared_builtin_block(StorageClassOutput, execution.model);
 			break;
 
@@ -8890,6 +8943,15 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 			SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
 		return "gl_FragFullyCoveredNV";
 
+	case BuiltInPrimitiveTriangleIndicesEXT:
+		return "gl_PrimitiveTriangleIndicesEXT";
+	case BuiltInPrimitiveLineIndicesEXT:
+		return "gl_PrimitiveLineIndicesEXT";
+	case BuiltInPrimitivePointIndicesEXT:
+		return "gl_PrimitivePointIndicesEXT";
+	case BuiltInCullPrimitiveEXT:
+		return "gl_CullPrimitiveEXT";
+
 	default:
 		return join("gl_BuiltIn_", convert_to_string(builtin));
 	}
@@ -8913,20 +8975,31 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index)
 }
 
 void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
-                                                      AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
+                                                      AccessChainFlags flags, bool &access_chain_is_arrayed,
                                                       uint32_t index)
 {
 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
+	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
 
-	expr += "[";
+	string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read);
 
-	if (index_is_literal)
-		expr += convert_to_string(index);
+	// For the case where the base of an OpPtrAccessChain already ends in [n],
+	// we need to use the index as an offset to the existing index, otherwise,
+	// we can just use the index directly.
+	if (ptr_chain && access_chain_is_arrayed)
+	{
+		size_t split_pos = expr.find_last_of(']');
+		string expr_front = expr.substr(0, split_pos);
+		string expr_back = expr.substr(split_pos);
+		expr = expr_front + " + " +  enclose_expression(idx_expr) + expr_back;
+	}
 	else
-		expr += to_unpacked_expression(index, register_expression_read);
-
-	expr += "]";
+	{
+		expr += "[";
+		expr += idx_expr;
+		expr += "]";
+	}
 }
 
 bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
@@ -8987,10 +9060,12 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 	bool pending_array_enclose = false;
 	bool dimension_flatten = false;
 
-	const auto append_index = [&](uint32_t index, bool is_literal) {
+	const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
 		AccessChainFlags mod_flags = flags;
 		if (!is_literal)
 			mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
+		if (!is_ptr_chain)
+			mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
 		access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
 		check_physical_type_cast(expr, type, physical_type);
 	};
@@ -9043,7 +9118,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			}
 			else
 			{
-				append_index(index, is_literal);
+				append_index(index, is_literal, true);
 			}
 
 			if (type->basetype == SPIRType::ControlPointArray)
@@ -9078,14 +9153,19 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 				// but HLSL seems to just emit straight arrays here.
 				// We must pretend this access goes through gl_in/gl_out arrays
 				// to be able to access certain builtins as arrays.
+				// Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
 				auto builtin = ir.meta[base].decoration.builtin_type;
+				bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
+
 				switch (builtin)
 				{
 				// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
 				// case BuiltInClipDistance:
 				case BuiltInPosition:
 				case BuiltInPointSize:
-					if (var->storage == StorageClassInput)
+					if (mesh_shader)
+						expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
+					else if (var->storage == StorageClassInput)
 						expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
 					else if (var->storage == StorageClassOutput)
 						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
@@ -9093,6 +9173,17 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 						append_index(index, is_literal);
 					break;
 
+				case BuiltInPrimitiveId:
+				case BuiltInLayer:
+				case BuiltInViewportIndex:
+				case BuiltInCullPrimitiveEXT:
+				case BuiltInPrimitiveShadingRateKHR:
+					if (mesh_shader)
+						expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
+					else
+						append_index(index, is_literal);
+					break;
+
 				default:
 					append_index(index, is_literal);
 					break;
@@ -10741,9 +10832,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (expr_type.vecsize > type.vecsize)
 			expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
 
+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = old_need_transpose;
+
 		// We might need to cast in order to load from a builtin.
 		cast_from_variable_load(ptr, expr, type);
 
+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = false;
+
 		// We might be trying to load a gl_Position[N], where we should be
 		// doing float4[](gl_in[i].gl_Position, ...) instead.
 		// Similar workarounds are required for input arrays in tessellation.
@@ -11261,8 +11358,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// forcing temporaries is not going to help.
 		// This is similar for Constant and Undef inputs.
 		// The only safe thing to RMW is SPIRExpression.
+		// If the expression has already been used (i.e. used in a continue block), we have to keep using
+		// that loop variable, since we won't be able to override the expression after the fact.
+		// If the composite is hoisted, we might never be able to properly invalidate any usage
+		// of that composite in a subsequent loop iteration.
 		if (invalid_expressions.count(composite) ||
 		    block_composite_insert_overwrite.count(composite) ||
+		    hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) ||
 		    maybe_get<SPIRExpression>(composite) == nullptr)
 		{
 			can_modify_in_place = false;
@@ -12495,12 +12597,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		uint32_t img = ops[2];
+		auto &type = expression_type(img);
+		auto &imgtype = get<SPIRType>(type.self);
 
 		std::string fname = "textureSize";
 		if (is_legacy_desktop())
 		{
-			auto &type = expression_type(img);
-			auto &imgtype = get<SPIRType>(type.self);
 			fname = legacy_tex_op(fname, imgtype, img);
 		}
 		else if (is_legacy_es())
@@ -12508,6 +12610,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 		auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
 		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
+
+		// ES needs to emulate 1D images as 2D.
+		if (type.image.dim == Dim1D && options.es)
+			expr = join(expr, ".x");
+
 		auto &restype = get<SPIRType>(ops[0]);
 		expr = bitcast_expression(restype, SPIRType::Int, expr);
 		emit_op(result_type, id, expr, true);
@@ -13499,6 +13606,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 		break;
 
+	case OpSetMeshOutputsEXT:
+		statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
@@ -13807,28 +13918,41 @@ string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
 	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
 }
 
-void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
+void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
 {
 	// Works around weird behavior in glslangValidator where
 	// a patch out block is translated to just block members getting the decoration.
 	// To make glslang not complain when we compile again, we have to transform this back to a case where
 	// the variable itself has Patch decoration, and not members.
+	// Same for perprimitiveEXT.
 	auto &type = get<SPIRType>(var.basetype);
 	if (has_decoration(type.self, DecorationBlock))
 	{
 		uint32_t member_count = uint32_t(type.member_types.size());
+		Decoration promoted_decoration = {};
+		bool do_promote_decoration = false;
 		for (uint32_t i = 0; i < member_count; i++)
 		{
 			if (has_member_decoration(type.self, i, DecorationPatch))
 			{
-				set_decoration(var.self, DecorationPatch);
+				promoted_decoration = DecorationPatch;
+				do_promote_decoration = true;
+				break;
+			}
+			else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
+			{
+				promoted_decoration = DecorationPerPrimitiveEXT;
+				do_promote_decoration = true;
 				break;
 			}
 		}
 
-		if (has_decoration(var.self, DecorationPatch))
+		if (do_promote_decoration)
+		{
+			set_decoration(var.self, promoted_decoration);
 			for (uint32_t i = 0; i < member_count; i++)
-				unset_member_decoration(type.self, i, DecorationPatch);
+				unset_member_decoration(type.self, i, promoted_decoration);
+		}
 	}
 }
 
@@ -13841,6 +13965,8 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
 
 	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
 		res += "shared ";
+	else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT)
+		res += "taskPayloadSharedEXT ";
 
 	res += to_interpolation_qualifiers(flags);
 	if (var)
@@ -15998,6 +16124,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		statement("terminateRayEXT;");
 		break;
 
+	case SPIRBlock::EmitMeshTasks:
+		statement("EmitMeshTasksEXT(",
+		          to_unpacked_expression(block.mesh.groups[0]), ", ",
+		          to_unpacked_expression(block.mesh.groups[1]), ", ",
+		          to_unpacked_expression(block.mesh.groups[2]), ");");
+		break;
+
 	default:
 		SPIRV_CROSS_THROW("Unimplemented block terminator.");
 	}
@@ -16326,6 +16459,9 @@ void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr
 	case BuiltInIncomingRayFlagsNV:
 	case BuiltInLaunchIdNV:
 	case BuiltInLaunchSizeNV:
+	case BuiltInPrimitiveTriangleIndicesEXT:
+	case BuiltInPrimitiveLineIndicesEXT:
+	case BuiltInPrimitivePointIndicesEXT:
 		expected_type = SPIRType::UInt;
 		break;
 

+ 8 - 2
3rdparty/spirv-cross/spirv_glsl.hpp

@@ -145,6 +145,12 @@ public:
 		// compares.
 		bool relax_nan_checks = false;
 
+		// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
+		// To load these types correctly, we must generate a wrapper. them in a dummy function which only purpose is to
+		// ensure row_major decoration is actually respected.
+		// This workaround may cause significant performance degeneration on some Android devices.
+		bool enable_row_major_load_workaround = true;
+
 		// If non-zero, controls layout(num_views = N) in; in GL_OVR_multiview2.
 		uint32_t ovr_multiview_view_count = 0;
 
@@ -622,7 +628,7 @@ protected:
 	void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration);
 	void emit_buffer_block_legacy(const SPIRVariable &var);
 	void emit_buffer_block_flattened(const SPIRVariable &type);
-	void fixup_implicit_builtin_block_names();
+	void fixup_implicit_builtin_block_names(spv::ExecutionModel model);
 	void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model);
 	bool should_force_emit_builtin_block(spv::StorageClass storage);
 	void emit_push_constant_block_vulkan(const SPIRVariable &var);
@@ -766,7 +772,7 @@ protected:
 	std::string type_to_glsl_constructor(const SPIRType &type);
 	std::string argument_decl(const SPIRFunction::Parameter &arg);
 	virtual std::string to_qualifiers_glsl(uint32_t id);
-	void fixup_io_block_patch_qualifiers(const SPIRVariable &var);
+	void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var);
 	void emit_output_variable_initializer(const SPIRVariable &var);
 	std::string to_precision_qualifiers_glsl(uint32_t id);
 	virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);

File diff suppressed because it is too large
+ 596 - 67
3rdparty/spirv-cross/spirv_msl.cpp


+ 55 - 21
3rdparty/spirv-cross/spirv_msl.hpp

@@ -34,34 +34,39 @@
 namespace SPIRV_CROSS_NAMESPACE
 {
 
-// Indicates the format of a shader input. Currently limited to specifying
+// Indicates the format of a shader interface variable. Currently limited to specifying
 // if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or
 // some other format.
-enum MSLShaderInputFormat
+enum MSLShaderVariableFormat
 {
-	MSL_SHADER_INPUT_FORMAT_OTHER = 0,
-	MSL_SHADER_INPUT_FORMAT_UINT8 = 1,
-	MSL_SHADER_INPUT_FORMAT_UINT16 = 2,
-	MSL_SHADER_INPUT_FORMAT_ANY16 = 3,
-	MSL_SHADER_INPUT_FORMAT_ANY32 = 4,
+	MSL_SHADER_VARIABLE_FORMAT_OTHER = 0,
+	MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1,
+	MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2,
+	MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3,
+	MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4,
 
 	// Deprecated aliases.
-	MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_INPUT_FORMAT_OTHER,
-	MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_INPUT_FORMAT_UINT8,
-	MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_INPUT_FORMAT_UINT16,
-
-	MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff
+	MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16,
+	MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32,
+
+	MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff
 };
 
-// Defines MSL characteristics of an input variable at a particular location.
+// Defines MSL characteristics of a shader interface variable at a particular location.
 // After compilation, it is possible to query whether or not this location was used.
 // If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader,
 // or behavior is undefined.
-struct MSLShaderInput
+struct MSLShaderInterfaceVariable
 {
 	uint32_t location = 0;
 	uint32_t component = 0;
-	MSLShaderInputFormat format = MSL_SHADER_INPUT_FORMAT_OTHER;
+	MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
 	spv::BuiltIn builtin = spv::BuiltInMax;
 	uint32_t vecsize = 0;
 };
@@ -539,10 +544,15 @@ public:
 	explicit CompilerMSL(const ParsedIR &ir);
 	explicit CompilerMSL(ParsedIR &&ir);
 
-	// input is a shader input description used to fix up shader input variables.
+	// input is a shader interface variable description used to fix up shader input variables.
 	// If shader inputs are provided, is_msl_shader_input_used() will return true after
-	// calling ::compile() if the location was used by the MSL code.
-	void add_msl_shader_input(const MSLShaderInput &input);
+	// calling ::compile() if the location were used by the MSL code.
+	void add_msl_shader_input(const MSLShaderInterfaceVariable &input);
+
+	// output is a shader interface variable description used to fix up shader output variables.
+	// If shader outputs are provided, is_msl_shader_output_used() will return true after
+	// calling ::compile() if the location were used by the MSL code.
+	void add_msl_shader_output(const MSLShaderInterfaceVariable &output);
 
 	// resource is a resource binding to indicate the MSL buffer,
 	// texture or sampler index to use for a particular SPIR-V description set
@@ -577,6 +587,9 @@ public:
 	// Query after compilation is done. This allows you to check if an input location was used by the shader.
 	bool is_msl_shader_input_used(uint32_t location);
 
+	// Query after compilation is done. This allows you to check if an output location were used by the shader.
+	bool is_msl_shader_output_used(uint32_t location);
+
 	// If not using add_msl_shader_input, it's possible
 	// that certain builtin attributes need to be automatically assigned locations.
 	// This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc.
@@ -584,6 +597,13 @@ public:
 	// add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]].
 	uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const;
 
+	// If not using add_msl_shader_output, it's possible
+	// that certain builtin attributes need to be automatically assigned locations.
+	// This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc.
+	// This returns k_unknown_location if the location were explicitly assigned with
+	// add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]].
+	uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const;
+
 	// NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here.
 	// Constexpr samplers are always assumed to be emitted.
 	// No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped
@@ -665,6 +685,7 @@ protected:
 		SPVFuncImplQuantizeToF16,
 		SPVFuncImplCubemapTo2DArrayFace,
 		SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
+		SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices
 		SPVFuncImplInverse4x4,
 		SPVFuncImplInverse3x3,
 		SPVFuncImplInverse2x2,
@@ -718,6 +739,8 @@ protected:
 	// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
 	// Use Metal's native frame-buffer fetch API for subpass inputs.
 	void emit_texture_op(const Instruction &i, bool sparse) override;
+	void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	std::string to_ptr_expression(uint32_t id, bool register_expression_read = true);
 	void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
 	void emit_instruction(const Instruction &instr) override;
 	void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
@@ -736,6 +759,7 @@ protected:
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                        const std::string &qualifier = "", uint32_t base_offset = 0) override;
 	void emit_struct_padding_target(const SPIRType &type) override;
+	std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member);
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
 	void emit_block_hints(const SPIRBlock &block) override;
 
@@ -796,6 +820,7 @@ protected:
 	void extract_global_variables_from_functions();
 	void mark_packable_structs();
 	void mark_as_packable(SPIRType &type);
+	void mark_as_workgroup_struct(SPIRType &type);
 
 	std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars;
 	void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
@@ -891,6 +916,8 @@ protected:
 	uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const;
 	uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin,
 	                                                       uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
+	uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin,
+	                                                        uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
 
 	uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const;
 
@@ -1001,12 +1028,17 @@ protected:
 	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
 	// Must be ordered to ensure declarations are in a specific order.
-	std::map<LocationComponentPair, MSLShaderInput> inputs_by_location;
-	std::unordered_map<uint32_t, MSLShaderInput> inputs_by_builtin;
+	std::map<LocationComponentPair, MSLShaderInterfaceVariable> inputs_by_location;
+	std::unordered_map<uint32_t, MSLShaderInterfaceVariable> inputs_by_builtin;
+	std::map<LocationComponentPair, MSLShaderInterfaceVariable> outputs_by_location;
+	std::unordered_map<uint32_t, MSLShaderInterfaceVariable> outputs_by_builtin;
 	std::unordered_set<uint32_t> location_inputs_in_use;
 	std::unordered_set<uint32_t> location_inputs_in_use_fallback;
+	std::unordered_set<uint32_t> location_outputs_in_use;
+	std::unordered_set<uint32_t> location_outputs_in_use_fallback;
 	std::unordered_map<uint32_t, uint32_t> fragment_output_components;
 	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
+	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
 	std::set<std::string> pragma_lines;
 	std::set<std::string> typedef_lines;
 	SmallVector<uint32_t> vars_needing_early_declaration;
@@ -1075,7 +1107,9 @@ protected:
 	const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const;
 
 	std::unordered_set<uint32_t> buffers_requiring_array_length;
-	SmallVector<uint32_t> buffer_arrays;
+	SmallVector<uint32_t> buffer_arrays_discrete;
+	SmallVector<std::pair<uint32_t, uint32_t>> buffer_aliases_argument;
+	SmallVector<uint32_t> buffer_aliases_discrete;
 	std::unordered_set<uint32_t> atomic_image_vars; // Emulate texture2D atomic operations
 	std::unordered_set<uint32_t> pull_model_inputs;
 

+ 25 - 0
3rdparty/spirv-cross/spirv_parser.cpp

@@ -183,6 +183,15 @@ void Parser::parse(const Instruction &instruction)
 	auto op = static_cast<Op>(instruction.op);
 	uint32_t length = instruction.length;
 
+	// HACK for glslang that might emit OpEmitMeshTasksEXT followed by return / branch.
+	// Instead of failing hard, just ignore it.
+	if (ignore_trailing_block_opcodes)
+	{
+		ignore_trailing_block_opcodes = false;
+		if (op == OpReturn || op == OpBranch || op == OpUnreachable)
+			return;
+	}
+
 	switch (op)
 	{
 	case OpSourceContinued:
@@ -349,6 +358,10 @@ void Parser::parse(const Instruction &instruction)
 			execution.output_vertices = ops[2];
 			break;
 
+		case ExecutionModeOutputPrimitivesEXT:
+			execution.output_primitives = ops[2];
+			break;
+
 		default:
 			break;
 		}
@@ -1103,6 +1116,18 @@ void Parser::parse(const Instruction &instruction)
 		current_block = nullptr;
 		break;
 
+	case OpEmitMeshTasksEXT:
+		if (!current_block)
+			SPIRV_CROSS_THROW("Trying to end a non-existing block.");
+		current_block->terminator = SPIRBlock::EmitMeshTasks;
+		for (uint32_t i = 0; i < 3; i++)
+			current_block->mesh.groups[i] = ops[i];
+		current_block->mesh.payload = length >= 4 ? ops[3] : 0;
+		current_block = nullptr;
+		// Currently glslang is bugged and does not treat EmitMeshTasksEXT as a terminator.
+		ignore_trailing_block_opcodes = true;
+		break;
+
 	case OpReturn:
 	{
 		if (!current_block)

+ 2 - 0
3rdparty/spirv-cross/spirv_parser.hpp

@@ -46,6 +46,8 @@ private:
 	ParsedIR ir;
 	SPIRFunction *current_function = nullptr;
 	SPIRBlock *current_block = nullptr;
+	// For workarounds.
+	bool ignore_trailing_block_opcodes = false;
 
 	void parse(const Instruction &instr);
 	const uint32_t *stream(const Instruction &instr) const;

Some files were not shown because too many files changed in this diff