|
|
@@ -58,7 +58,7 @@ CompilerMSL::CompilerMSL(ParsedIR &&ir_)
|
|
|
|
|
|
void CompilerMSL::add_msl_shader_input(const MSLShaderInput &si)
|
|
|
{
|
|
|
- inputs_by_location[si.location] = si;
|
|
|
+ inputs_by_location[{si.location, si.component}] = si;
|
|
|
if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin))
|
|
|
inputs_by_builtin[si.builtin] = si;
|
|
|
}
|
|
|
@@ -1462,9 +1462,9 @@ string CompilerMSL::compile()
|
|
|
|
|
|
emit_header();
|
|
|
emit_custom_templates();
|
|
|
+ emit_custom_functions();
|
|
|
emit_specialization_constants_and_structs();
|
|
|
emit_resources();
|
|
|
- emit_custom_functions();
|
|
|
emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
|
|
|
|
|
|
pass_count++;
|
|
|
@@ -2224,9 +2224,10 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
|
|
|
if (get_decoration_bitset(var.self).get(DecorationLocation))
|
|
|
{
|
|
|
uint32_t locn = get_decoration(var.self, DecorationLocation);
|
|
|
+ uint32_t comp = get_decoration(var.self, DecorationComponent);
|
|
|
if (storage == StorageClassInput)
|
|
|
{
|
|
|
- type_id = ensure_correct_input_type(var.basetype, locn, 0, meta.strip_array);
|
|
|
+ type_id = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
|
|
|
var.basetype = type_id;
|
|
|
|
|
|
type_id = get_pointee_type_id(type_id);
|
|
|
@@ -2238,6 +2239,8 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
|
|
|
ib_type.member_types[ib_mbr_idx] = type_id;
|
|
|
}
|
|
|
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
|
|
|
+ if (comp)
|
|
|
+ set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
|
|
|
mark_location_as_used_by_shader(locn, get<SPIRType>(type_id), storage);
|
|
|
}
|
|
|
else if (is_builtin && is_tessellation_shader() && inputs_by_builtin.count(builtin))
|
|
|
@@ -2393,8 +2396,8 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
|
|
|
uint32_t comp = get_decoration(var.self, DecorationComponent);
|
|
|
if (storage == StorageClassInput)
|
|
|
{
|
|
|
- var.basetype = ensure_correct_input_type(var.basetype, locn, 0, meta.strip_array);
|
|
|
- uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, 0, meta.strip_array);
|
|
|
+ var.basetype = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
|
|
|
+ uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, comp, 0, meta.strip_array);
|
|
|
if (storage == StorageClassInput && pull_model_inputs.count(var.self))
|
|
|
ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
|
|
|
else
|
|
|
@@ -2739,9 +2742,10 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
|
|
|
if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
|
|
|
{
|
|
|
uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
|
|
|
+ uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
|
|
|
if (storage == StorageClassInput)
|
|
|
{
|
|
|
- mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, 0, meta.strip_array);
|
|
|
+ mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, comp, 0, meta.strip_array);
|
|
|
var_type.member_types[mbr_idx] = mbr_type_id;
|
|
|
if (storage == StorageClassInput && pull_model_inputs.count(var.self))
|
|
|
ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
|
|
|
@@ -2758,7 +2762,7 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
|
|
|
uint32_t locn = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
|
|
|
if (storage == StorageClassInput)
|
|
|
{
|
|
|
- mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, 0, meta.strip_array);
|
|
|
+ mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, 0, 0, meta.strip_array);
|
|
|
var_type.member_types[mbr_idx] = mbr_type_id;
|
|
|
if (storage == StorageClassInput && pull_model_inputs.count(var.self))
|
|
|
ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
|
|
|
@@ -3602,7 +3606,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
|
|
|
// the struct containing them is the correct size and layout.
|
|
|
for (auto &input : inputs_by_location)
|
|
|
{
|
|
|
- if (location_inputs_in_use.count(input.first) != 0)
|
|
|
+ if (location_inputs_in_use.count(input.first.location) != 0)
|
|
|
continue;
|
|
|
|
|
|
// Create a fake variable to put at the location.
|
|
|
@@ -3642,7 +3646,10 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
|
|
|
ptr_type.self = array_type_id;
|
|
|
|
|
|
auto &fake_var = set<SPIRVariable>(var_id, ptr_type_id, storage);
|
|
|
- set_decoration(var_id, DecorationLocation, input.first);
|
|
|
+ set_decoration(var_id, DecorationLocation, input.first.location);
|
|
|
+ if (input.first.component)
|
|
|
+ set_decoration(var_id, DecorationComponent, input.first.component);
|
|
|
+
|
|
|
meta.strip_array = true;
|
|
|
meta.allow_local_declaration = false;
|
|
|
add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta);
|
|
|
@@ -3794,7 +3801,7 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil
|
|
|
// Ensure that the type is compatible with the shader input.
|
|
|
// If it is, simply return the given type ID.
|
|
|
// Otherwise, create a new type, and return its ID.
|
|
|
-uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components, bool strip_array)
|
|
|
+uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, uint32_t num_components, bool strip_array)
|
|
|
{
|
|
|
auto &type = get<SPIRType>(type_id);
|
|
|
|
|
|
@@ -3804,7 +3811,7 @@ uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t locat
|
|
|
if (type.basetype == SPIRType::Struct || type.array.size() > max_array_dimensions)
|
|
|
return type_id;
|
|
|
|
|
|
- auto p_va = inputs_by_location.find(location);
|
|
|
+ auto p_va = inputs_by_location.find({location, component});
|
|
|
if (p_va == end(inputs_by_location))
|
|
|
{
|
|
|
if (num_components > type.vecsize)
|
|
|
@@ -5001,6 +5008,24 @@ void CompilerMSL::emit_custom_functions()
|
|
|
statement("");
|
|
|
break;
|
|
|
|
|
|
+ case SPVFuncImplQuantizeToF16:
|
|
|
+ // Ensure fast-math is disabled to match Vulkan results.
|
|
|
+ // SpvHalfTypeSelector is used to match the half* template type to the float* template type.
|
|
|
+ // Depending on GPU, MSL does not always flush converted subnormal halfs to zero,
|
|
|
+ // as required by OpQuantizeToF16, so check for subnormals and flush them to zero.
|
|
|
+ statement("template <typename F> struct SpvHalfTypeSelector;");
|
|
|
+ statement("template <> struct SpvHalfTypeSelector<float> { public: using H = half; };");
|
|
|
+ statement("template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };");
|
|
|
+ statement("template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>");
|
|
|
+ statement("[[clang::optnone]] F spvQuantizeToF16(F fval)");
|
|
|
+ begin_scope();
|
|
|
+ statement("H hval = H(fval);");
|
|
|
+ statement("hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));");
|
|
|
+ statement("return F(hval);");
|
|
|
+ end_scope();
|
|
|
+ statement("");
|
|
|
+ break;
|
|
|
+
|
|
|
// Emulate texturecube_array with texture2d_array for iOS where this type is not available
|
|
|
case SPVFuncImplCubemapTo2DArrayFace:
|
|
|
statement(force_inline);
|
|
|
@@ -8064,28 +8089,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
|
|
uint32_t result_type = ops[0];
|
|
|
uint32_t id = ops[1];
|
|
|
uint32_t arg = ops[2];
|
|
|
-
|
|
|
- string exp;
|
|
|
- auto &type = get<SPIRType>(result_type);
|
|
|
-
|
|
|
- switch (type.vecsize)
|
|
|
- {
|
|
|
- case 1:
|
|
|
- exp = join("float(half(", to_expression(arg), "))");
|
|
|
- break;
|
|
|
- case 2:
|
|
|
- exp = join("float2(half2(", to_expression(arg), "))");
|
|
|
- break;
|
|
|
- case 3:
|
|
|
- exp = join("float3(half3(", to_expression(arg), "))");
|
|
|
- break;
|
|
|
- case 4:
|
|
|
- exp = join("float4(half4(", to_expression(arg), "))");
|
|
|
- break;
|
|
|
- default:
|
|
|
- SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
|
|
|
- }
|
|
|
-
|
|
|
+ string exp = join("spvQuantizeToF16(", to_expression(arg), ")");
|
|
|
emit_op(result_type, id, exp, should_forward(arg));
|
|
|
break;
|
|
|
}
|
|
|
@@ -13338,31 +13342,11 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type)
|
|
|
|
|
|
string CompilerMSL::constant_op_expression(const SPIRConstantOp &cop)
|
|
|
{
|
|
|
- auto &type = get<SPIRType>(cop.basetype);
|
|
|
- string op;
|
|
|
-
|
|
|
switch (cop.opcode)
|
|
|
{
|
|
|
case OpQuantizeToF16:
|
|
|
- switch (type.vecsize)
|
|
|
- {
|
|
|
- case 1:
|
|
|
- op = "float(half(";
|
|
|
- break;
|
|
|
- case 2:
|
|
|
- op = "float2(half2(";
|
|
|
- break;
|
|
|
- case 3:
|
|
|
- op = "float3(half3(";
|
|
|
- break;
|
|
|
- case 4:
|
|
|
- op = "float4(half4(";
|
|
|
- break;
|
|
|
- default:
|
|
|
- SPIRV_CROSS_THROW("Illegal argument to OpSpecConstantOp QuantizeToF16.");
|
|
|
- }
|
|
|
- return join(op, to_expression(cop.arguments[0]), "))");
|
|
|
-
|
|
|
+ add_spv_func_and_recompile(SPVFuncImplQuantizeToF16);
|
|
|
+ return join("spvQuantizeToF16(", to_expression(cop.arguments[0]), ")");
|
|
|
default:
|
|
|
return CompilerGLSL::constant_op_expression(cop);
|
|
|
}
|
|
|
@@ -14536,11 +14520,11 @@ SPIRType CompilerMSL::get_presumed_input_type(const SPIRType &ib_type, uint32_t
|
|
|
{
|
|
|
SPIRType type = get_physical_member_type(ib_type, index);
|
|
|
uint32_t loc = get_member_decoration(ib_type.self, index, DecorationLocation);
|
|
|
- if (inputs_by_location.count(loc))
|
|
|
- {
|
|
|
- if (inputs_by_location.at(loc).vecsize > type.vecsize)
|
|
|
- type.vecsize = inputs_by_location.at(loc).vecsize;
|
|
|
- }
|
|
|
+ uint32_t cmp = get_member_decoration(ib_type.self, index, DecorationComponent);
|
|
|
+ auto p_va = inputs_by_location.find({loc, cmp});
|
|
|
+ if (p_va != end(inputs_by_location) && p_va->second.vecsize > type.vecsize)
|
|
|
+ type.vecsize = p_va->second.vecsize;
|
|
|
+
|
|
|
return type;
|
|
|
}
|
|
|
|
|
|
@@ -15054,6 +15038,9 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
|
|
}
|
|
|
break;
|
|
|
|
|
|
+ case OpQuantizeToF16:
|
|
|
+ return SPVFuncImplQuantizeToF16;
|
|
|
+
|
|
|
case OpTypeArray:
|
|
|
{
|
|
|
// Allow Metal to use the array<T> template to make arrays a value type
|