|
|
@@ -30,6 +30,7 @@
|
|
|
#include <limits>
|
|
|
#include <locale.h>
|
|
|
#include <utility>
|
|
|
+#include <array>
|
|
|
|
|
|
#ifndef _WIN32
|
|
|
#include <langinfo.h>
|
|
|
@@ -1521,7 +1522,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits
|
|
|
{
|
|
|
// If using PhysicalStorageBufferEXT storage class, this is a pointer,
|
|
|
// and is 64-bit.
|
|
|
- if (type.storage == StorageClassPhysicalStorageBufferEXT)
|
|
|
+ if (type_is_top_level_physical_pointer(type))
|
|
|
{
|
|
|
if (!type.pointer)
|
|
|
SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
|
|
|
@@ -1536,8 +1537,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits
|
|
|
else
|
|
|
SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
|
|
|
}
|
|
|
-
|
|
|
- if (!type.array.empty())
|
|
|
+ else if (type_is_top_level_array(type))
|
|
|
{
|
|
|
uint32_t minimum_alignment = 1;
|
|
|
if (packing_is_vec4_padded(packing))
|
|
|
@@ -1643,21 +1643,9 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B
|
|
|
|
|
|
uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
|
|
|
{
|
|
|
- if (!type.array.empty())
|
|
|
- {
|
|
|
- uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
|
|
|
-
|
|
|
- // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
|
|
|
- // so that it is possible to pack other vectors into the last element.
|
|
|
- if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
|
|
|
- packed_size -= (4 - type.vecsize) * (type.width / 8);
|
|
|
-
|
|
|
- return packed_size;
|
|
|
- }
|
|
|
-
|
|
|
// If using PhysicalStorageBufferEXT storage class, this is a pointer,
|
|
|
// and is 64-bit.
|
|
|
- if (type.storage == StorageClassPhysicalStorageBufferEXT)
|
|
|
+ if (type_is_top_level_physical_pointer(type))
|
|
|
{
|
|
|
if (!type.pointer)
|
|
|
SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
|
|
|
@@ -1667,6 +1655,17 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f
|
|
|
else
|
|
|
SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
|
|
|
}
|
|
|
+ else if (type_is_top_level_array(type))
|
|
|
+ {
|
|
|
+ uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
|
|
|
+
|
|
|
+ // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
|
|
|
+ // so that it is possible to pack other vectors into the last element.
|
|
|
+ if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
|
|
|
+ packed_size -= (4 - type.vecsize) * (type.width / 8);
|
|
|
+
|
|
|
+ return packed_size;
|
|
|
+ }
|
|
|
|
|
|
uint32_t size = 0;
|
|
|
|
|
|
@@ -1834,8 +1833,9 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
|
|
|
}
|
|
|
|
|
|
// Verify array stride rules.
|
|
|
- if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
|
|
|
- type_struct_member_array_stride(type, i))
|
|
|
+ if (type_is_top_level_array(memb_type) &&
|
|
|
+ type_to_packed_array_stride(memb_type, member_flags, packing) !=
|
|
|
+ type_struct_member_array_stride(type, i))
|
|
|
{
|
|
|
if (failed_validation_index)
|
|
|
*failed_validation_index = i;
|
|
|
@@ -3993,6 +3993,169 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
|
|
|
+{
|
|
|
+ std::string result;
|
|
|
+ switch (group_op)
|
|
|
+ {
|
|
|
+ case GroupOperationReduce:
|
|
|
+ result = "reduction";
|
|
|
+ break;
|
|
|
+
|
|
|
+ case GroupOperationExclusiveScan:
|
|
|
+ result = "excl_scan";
|
|
|
+ break;
|
|
|
+
|
|
|
+ case GroupOperationInclusiveScan:
|
|
|
+ result = "incl_scan";
|
|
|
+ break;
|
|
|
+
|
|
|
+ default:
|
|
|
+ SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
|
|
|
+ }
|
|
|
+
|
|
|
+ struct TypeInfo
|
|
|
+ {
|
|
|
+ std::string type;
|
|
|
+ std::string identity;
|
|
|
+ };
|
|
|
+
|
|
|
+ std::vector<TypeInfo> type_infos;
|
|
|
+ switch (op)
|
|
|
+ {
|
|
|
+ case OpGroupNonUniformIAdd:
|
|
|
+ {
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uint", "0u" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "int", "0" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" });
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ case OpGroupNonUniformFAdd:
|
|
|
+ {
|
|
|
+ type_infos.emplace_back(TypeInfo{ "float", "0.0f" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" });
|
|
|
+ // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
|
|
|
+ type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" });
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ case OpGroupNonUniformIMul:
|
|
|
+ {
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uint", "1u" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "int", "1" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" });
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ case OpGroupNonUniformFMul:
|
|
|
+ {
|
|
|
+ type_infos.emplace_back(TypeInfo{ "float", "1.0f" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" });
|
|
|
+ type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" });
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ default:
|
|
|
+ SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
|
|
|
+ }
|
|
|
+
|
|
|
+ const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
|
|
|
+ const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
|
|
|
+ std::string op_symbol;
|
|
|
+ if (op_is_addition)
|
|
|
+ {
|
|
|
+ op_symbol = "+=";
|
|
|
+ }
|
|
|
+ else if (op_is_multiplication)
|
|
|
+ {
|
|
|
+ op_symbol = "*=";
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const TypeInfo &t : type_infos)
|
|
|
+ {
|
|
|
+ statement(t.type, " ", func, "(", t.type, " v)");
|
|
|
+ begin_scope();
|
|
|
+ statement(t.type, " ", result, " = ", t.identity, ";");
|
|
|
+ statement("uvec4 active_threads = subgroupBallot(true);");
|
|
|
+ statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
|
|
|
+ begin_scope();
|
|
|
+ statement("uint total = gl_SubgroupSize / 2u;");
|
|
|
+ statement(result, " = v;");
|
|
|
+ statement("for (uint i = 1u; i <= total; i <<= 1u)");
|
|
|
+ begin_scope();
|
|
|
+ statement("bool valid;");
|
|
|
+ if (group_op == GroupOperationReduce)
|
|
|
+ {
|
|
|
+ statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);");
|
|
|
+ }
|
|
|
+ else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
|
|
|
+ {
|
|
|
+ statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);");
|
|
|
+ }
|
|
|
+ if (op_is_addition || op_is_multiplication)
|
|
|
+ {
|
|
|
+ statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
|
|
|
+ }
|
|
|
+ end_scope();
|
|
|
+ if (group_op == GroupOperationExclusiveScan)
|
|
|
+ {
|
|
|
+ statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);");
|
|
|
+ statement("if (subgroupElect())");
|
|
|
+ begin_scope();
|
|
|
+ statement(result, " = ", t.identity, ";");
|
|
|
+ end_scope();
|
|
|
+ }
|
|
|
+ end_scope();
|
|
|
+ statement("else");
|
|
|
+ begin_scope();
|
|
|
+ if (group_op == GroupOperationExclusiveScan)
|
|
|
+ {
|
|
|
+ statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
|
|
|
+ }
|
|
|
+ else if (group_op == GroupOperationInclusiveScan)
|
|
|
+ {
|
|
|
+ statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
|
|
|
+ }
|
|
|
+ statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)");
|
|
|
+ begin_scope();
|
|
|
+ statement("bool valid = subgroupBallotBitExtract(active_threads, i);");
|
|
|
+ statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);");
|
|
|
+ if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
|
|
|
+ {
|
|
|
+ statement("valid = valid && (i < total);");
|
|
|
+ }
|
|
|
+ if (op_is_addition || op_is_multiplication)
|
|
|
+ {
|
|
|
+ statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
|
|
|
+ }
|
|
|
+ end_scope();
|
|
|
+ end_scope();
|
|
|
+ statement("return ", result, ";");
|
|
|
+ end_scope();
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
|
|
|
{
|
|
|
static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
|
|
|
@@ -4396,6 +4559,57 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
|
|
|
statement("#endif");
|
|
|
statement("");
|
|
|
}
|
|
|
+
|
|
|
+ auto arithmetic_feature_helper =
|
|
|
+ [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op)
|
|
|
+ {
|
|
|
+ if (shader_subgroup_supporter.is_feature_requested(feat))
|
|
|
+ {
|
|
|
+ auto exts = Supp::get_candidates_for_feature(feat, result);
|
|
|
+ for (auto &e : exts)
|
|
|
+ {
|
|
|
+ const char *name = Supp::get_extension_name(e);
|
|
|
+ statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
|
|
|
+
|
|
|
+ switch (e)
|
|
|
+ {
|
|
|
+ case Supp::NV_shader_thread_shuffle:
|
|
|
+ emit_subgroup_arithmetic_workaround(func_name, op, group_op);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ statement("#endif");
|
|
|
+ statement("");
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
|
|
|
+ GroupOperationReduce);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
|
|
|
+ OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
|
|
|
+ OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
|
|
|
+ GroupOperationReduce);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
|
|
|
+ OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
|
|
|
+ OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
|
|
|
+
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
|
|
|
+ GroupOperationReduce);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
|
|
|
+ OpGroupNonUniformIMul, GroupOperationExclusiveScan);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
|
|
|
+ OpGroupNonUniformIMul, GroupOperationInclusiveScan);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
|
|
|
+ GroupOperationReduce);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
|
|
|
+ OpGroupNonUniformFMul, GroupOperationExclusiveScan);
|
|
|
+ arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
|
|
|
+ OpGroupNonUniformFMul, GroupOperationInclusiveScan);
|
|
|
}
|
|
|
|
|
|
if (!workaround_ubo_load_overload_types.empty())
|
|
|
@@ -7109,7 +7323,7 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
|
|
|
+bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops)
|
|
|
{
|
|
|
switch (op)
|
|
|
{
|
|
|
@@ -7128,6 +7342,22 @@ bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
|
|
|
case OpGroupNonUniformBallotBitExtract:
|
|
|
case OpGroupNonUniformInverseBallot:
|
|
|
return true;
|
|
|
+ case OpGroupNonUniformIAdd:
|
|
|
+ case OpGroupNonUniformFAdd:
|
|
|
+ case OpGroupNonUniformIMul:
|
|
|
+ case OpGroupNonUniformFMul:
|
|
|
+ {
|
|
|
+ const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
|
|
|
+ if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
|
|
|
+ operation == GroupOperationExclusiveScan)
|
|
|
+ {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
default:
|
|
|
return false;
|
|
|
}
|
|
|
@@ -7522,7 +7752,7 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
|
|
|
bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
|
|
|
{
|
|
|
auto &type = expression_type(ptr);
|
|
|
- if (type.array.empty())
|
|
|
+ if (!type_is_top_level_array(get_pointee_type(type)))
|
|
|
return false;
|
|
|
|
|
|
if (!backend.array_is_value_type)
|
|
|
@@ -8725,7 +8955,7 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
|
|
|
const uint32_t *ops = stream(i);
|
|
|
auto op = static_cast<Op>(i.op);
|
|
|
|
|
|
- if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
|
|
|
+ if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
|
|
|
SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
|
|
|
|
|
|
// If we need to do implicit bitcasts, make sure we do it with the correct type.
|
|
|
@@ -8793,12 +9023,34 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
|
|
|
}
|
|
|
break;
|
|
|
|
|
|
- case OpGroupNonUniformFAdd:
|
|
|
- case OpGroupNonUniformFMul:
|
|
|
+ // clang-format off
|
|
|
+#define GLSL_GROUP_OP(OP)\
|
|
|
+ case OpGroupNonUniform##OP:\
|
|
|
+ {\
|
|
|
+ auto operation = static_cast<GroupOperation>(ops[3]);\
|
|
|
+ if (operation == GroupOperationClusteredReduce)\
|
|
|
+ require_extension_internal("GL_KHR_shader_subgroup_clustered");\
|
|
|
+ else if (operation == GroupOperationReduce)\
|
|
|
+ request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
|
|
|
+ else if (operation == GroupOperationExclusiveScan)\
|
|
|
+ request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
|
|
|
+ else if (operation == GroupOperationInclusiveScan)\
|
|
|
+ request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
|
|
|
+ else\
|
|
|
+ SPIRV_CROSS_THROW("Invalid group operation.");\
|
|
|
+ break;\
|
|
|
+ }
|
|
|
+
|
|
|
+ GLSL_GROUP_OP(IAdd)
|
|
|
+ GLSL_GROUP_OP(FAdd)
|
|
|
+ GLSL_GROUP_OP(IMul)
|
|
|
+ GLSL_GROUP_OP(FMul)
|
|
|
+
|
|
|
+#undef GLSL_GROUP_OP
|
|
|
+ // clang-format on
|
|
|
+
|
|
|
case OpGroupNonUniformFMin:
|
|
|
case OpGroupNonUniformFMax:
|
|
|
- case OpGroupNonUniformIAdd:
|
|
|
- case OpGroupNonUniformIMul:
|
|
|
case OpGroupNonUniformSMin:
|
|
|
case OpGroupNonUniformSMax:
|
|
|
case OpGroupNonUniformUMin:
|
|
|
@@ -17667,6 +17919,7 @@ const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candid
|
|
|
static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
|
|
|
"GL_KHR_shader_subgroup_basic",
|
|
|
"GL_KHR_shader_subgroup_vote",
|
|
|
+ "GL_KHR_shader_subgroup_arithmetic",
|
|
|
"GL_NV_gpu_shader_5",
|
|
|
"GL_NV_shader_thread_group",
|
|
|
"GL_NV_shader_thread_shuffle",
|
|
|
@@ -17715,6 +17968,21 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSub
|
|
|
return { SubgroupMask };
|
|
|
case SubgroupBallotBitCount:
|
|
|
return { SubgroupBallot };
|
|
|
+ case SubgroupArithmeticIAddReduce:
|
|
|
+ case SubgroupArithmeticIAddInclusiveScan:
|
|
|
+ case SubgroupArithmeticFAddReduce:
|
|
|
+ case SubgroupArithmeticFAddInclusiveScan:
|
|
|
+ case SubgroupArithmeticIMulReduce:
|
|
|
+ case SubgroupArithmeticIMulInclusiveScan:
|
|
|
+ case SubgroupArithmeticFMulReduce:
|
|
|
+ case SubgroupArithmeticFMulInclusiveScan:
|
|
|
+ return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
|
|
|
+ case SubgroupArithmeticIAddExclusiveScan:
|
|
|
+ case SubgroupArithmeticFAddExclusiveScan:
|
|
|
+ case SubgroupArithmeticIMulExclusiveScan:
|
|
|
+ case SubgroupArithmeticFMulExclusiveScan:
|
|
|
+ return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
|
|
|
+ SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
|
|
|
default:
|
|
|
return {};
|
|
|
}
|
|
|
@@ -17728,11 +17996,15 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgr
|
|
|
|
|
|
bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
|
|
|
{
|
|
|
- static const bool retval[FeatureCount] = { false, false, false, false, false, false,
|
|
|
- true, // SubgroupBalloFindLSB_MSB
|
|
|
- false, false, false, false,
|
|
|
- true, // SubgroupMemBarrier - replaced with workgroup memory barriers
|
|
|
- false, false, true, false };
|
|
|
+ static const bool retval[FeatureCount] = {
|
|
|
+ false, false, false, false, false, false,
|
|
|
+ true, // SubgroupBalloFindLSB_MSB
|
|
|
+ false, false, false, false,
|
|
|
+ true, // SubgroupMemBarrier - replaced with workgroup memory barriers
|
|
|
+ false, false, true, false,
|
|
|
+ false, false, false, false, false, false, // iadd, fadd
|
|
|
+ false, false, false, false, false, false, // imul , fmul
|
|
|
+ };
|
|
|
|
|
|
return retval[feature];
|
|
|
}
|
|
|
@@ -17744,7 +18016,11 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgrou
|
|
|
KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
|
|
|
KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
|
|
|
KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
|
|
|
- KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
|
|
|
+ KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
|
|
|
+ KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
|
|
|
+ KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
|
|
|
+ KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
|
|
|
+ KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
|
|
|
};
|
|
|
|
|
|
return extensions[feature];
|
|
|
@@ -17840,6 +18116,19 @@ CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderS
|
|
|
return { NV_shader_thread_group };
|
|
|
case SubgroupBallotBitCount:
|
|
|
return {};
|
|
|
+ case SubgroupArithmeticIAddReduce:
|
|
|
+ case SubgroupArithmeticIAddExclusiveScan:
|
|
|
+ case SubgroupArithmeticIAddInclusiveScan:
|
|
|
+ case SubgroupArithmeticFAddReduce:
|
|
|
+ case SubgroupArithmeticFAddExclusiveScan:
|
|
|
+ case SubgroupArithmeticFAddInclusiveScan:
|
|
|
+ case SubgroupArithmeticIMulReduce:
|
|
|
+ case SubgroupArithmeticIMulExclusiveScan:
|
|
|
+ case SubgroupArithmeticIMulInclusiveScan:
|
|
|
+ case SubgroupArithmeticFMulReduce:
|
|
|
+ case SubgroupArithmeticFMulExclusiveScan:
|
|
|
+ case SubgroupArithmeticFMulInclusiveScan:
|
|
|
+ return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
|
|
|
default:
|
|
|
return {};
|
|
|
}
|
|
|
@@ -17864,6 +18153,7 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
|
|
|
weights[KHR_shader_subgroup_ballot] = big_num;
|
|
|
weights[KHR_shader_subgroup_basic] = big_num;
|
|
|
weights[KHR_shader_subgroup_vote] = big_num;
|
|
|
+ weights[KHR_shader_subgroup_arithmetic] = big_num;
|
|
|
}
|
|
|
|
|
|
void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
|