forked from suyu/suyu
glsl: F16x2 storage atomics
This commit is contained in:
parent
11ba190462
commit
9cc1b8a873
7 changed files with 64 additions and 58 deletions
|
@ -39,6 +39,10 @@ void EmitContext::SetupExtensions(std::string& header) {
|
||||||
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
||||||
header += "#extension NV_shader_atomic_fp16_vector : enable\n";
|
header += "#extension NV_shader_atomic_fp16_vector : enable\n";
|
||||||
}
|
}
|
||||||
|
if (info.uses_fp16) {
|
||||||
|
// TODO: AMD
|
||||||
|
header += "#extension GL_NV_gpu_shader5 : enable\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineConstantBuffers() {
|
void EmitContext::DefineConstantBuffers() {
|
||||||
|
@ -89,6 +93,18 @@ void EmitContext::DefineHelperFunctions() {
|
||||||
code += "uint CasFloatMax32x2(uint op_a,uint op_b){return "
|
code += "uint CasFloatMax32x2(uint op_a,uint op_b){return "
|
||||||
"packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n";
|
"packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n";
|
||||||
}
|
}
|
||||||
|
if (info.uses_atomic_f16x2_add) {
|
||||||
|
code += "uint CasFloatAdd16x2(uint op_a,uint op_b){return "
|
||||||
|
"packFloat2x16(unpackFloat2x16(op_a)+unpackFloat2x16(op_b));}\n";
|
||||||
|
}
|
||||||
|
if (info.uses_atomic_f16x2_min) {
|
||||||
|
code += "uint CasFloatMin16x2(uint op_a,uint op_b){return "
|
||||||
|
"packFloat2x16(min(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n";
|
||||||
|
}
|
||||||
|
if (info.uses_atomic_f16x2_max) {
|
||||||
|
code += "uint CasFloatMax16x2(uint op_a,uint op_b){return "
|
||||||
|
"packFloat2x16(max(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n";
|
||||||
|
}
|
||||||
// TODO: Track this usage
|
// TODO: Track this usage
|
||||||
code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
|
code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
|
||||||
code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
|
code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
|
||||||
|
|
|
@ -43,6 +43,11 @@ public:
|
||||||
Add<Type::U1>(format_str, inst, args...);
|
Add<Type::U1>(format_str, inst, args...);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) {
|
||||||
|
Add<Type::F16x2>(format_str, inst, args...);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
|
void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
|
||||||
Add<Type::U32>(format_str, inst, args...);
|
Add<Type::U32>(format_str, inst, args...);
|
||||||
|
|
|
@ -12,8 +12,7 @@
|
||||||
|
|
||||||
namespace Shader::Backend::GLSL {
|
namespace Shader::Backend::GLSL {
|
||||||
namespace {
|
namespace {
|
||||||
static constexpr std::string_view cas_loop{R"(
|
static constexpr std::string_view cas_loop{R"(uint {};
|
||||||
uint {};
|
|
||||||
for (;;){{
|
for (;;){{
|
||||||
uint old_value={};
|
uint old_value={};
|
||||||
{}=atomicCompSwap({},old_value,{}({},{}));
|
{}=atomicCompSwap({},old_value,{}({},{}));
|
||||||
|
@ -49,6 +48,14 @@ void CasFunctionF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
||||||
const auto ret{ctx.reg_alloc.Define(inst)};
|
const auto ret{ctx.reg_alloc.Define(inst)};
|
||||||
CasFunction(ctx, ret, ssbo, u32_value, function);
|
CasFunction(ctx, ret, ssbo, u32_value, function);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CasFunctionF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
const IR::Value& offset, std::string_view value, std::string_view function) {
|
||||||
|
const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())};
|
||||||
|
const std::string u32_value{fmt::format("packFloat2x16({})", value)};
|
||||||
|
const auto ret{ctx.reg_alloc.Define(inst)};
|
||||||
|
CasFunction(ctx, ret, ssbo, u32_value, function);
|
||||||
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
@ -122,11 +129,8 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
||||||
// LOG_WARNING(..., "Op falling to non-atomic");
|
// LOG_WARNING(..., "Op falling to non-atomic");
|
||||||
ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
||||||
binding.U32(), offset.U32() + 1);
|
binding.U32(), offset.U32() + 1);
|
||||||
ctx.Add(R"(
|
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||||
for(int i=0;i<2;++i){{
|
"ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}",
|
||||||
ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));
|
|
||||||
}}
|
|
||||||
)",
|
|
||||||
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,12 +139,9 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
||||||
// LOG_WARNING(..., "Op falling to non-atomic");
|
// LOG_WARNING(..., "Op falling to non-atomic");
|
||||||
ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
||||||
binding.U32(), offset.U32() + 1);
|
binding.U32(), offset.U32() + 1);
|
||||||
ctx.Add(R"(
|
ctx.Add(
|
||||||
for(int i=0;i<2;++i){{
|
"for(int i=0;i<2;++i){{ ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}",
|
||||||
ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);
|
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
||||||
}}
|
|
||||||
)",
|
|
||||||
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
@ -148,11 +149,8 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
||||||
// LOG_WARNING(..., "Op falling to non-atomic");
|
// LOG_WARNING(..., "Op falling to non-atomic");
|
||||||
ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
||||||
binding.U32(), offset.U32() + 1);
|
binding.U32(), offset.U32() + 1);
|
||||||
ctx.Add(R"(
|
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||||
for(int i=0;i<2;++i){{
|
"ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}",
|
||||||
ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));
|
|
||||||
}}
|
|
||||||
)",
|
|
||||||
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,12 +159,9 @@ void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
||||||
// LOG_WARNING(..., "Op falling to non-atomic");
|
// LOG_WARNING(..., "Op falling to non-atomic");
|
||||||
ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(),
|
||||||
binding.U32(), offset.U32() + 1);
|
binding.U32(), offset.U32() + 1);
|
||||||
ctx.Add(R"(
|
ctx.Add(
|
||||||
for(int i=0;i<2;++i){{
|
"for(int i=0;i<2;++i){{ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}",
|
||||||
ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);
|
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
||||||
}}
|
|
||||||
)",
|
|
||||||
binding.U32(), offset.U32(), binding.U32(), offset.U32(), value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
@ -202,45 +197,33 @@ void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value&
|
||||||
CasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
|
CasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicAddF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
const IR::Value& offset, std::string_view value) {
|
||||||
[[maybe_unused]] const IR::Value& offset,
|
CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatAdd16x2");
|
||||||
[[maybe_unused]] std::string_view value) {
|
|
||||||
throw NotImplementedException("GLSL Instrucion");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
const IR::Value& offset, std::string_view value) {
|
||||||
[[maybe_unused]] const IR::Value& offset,
|
|
||||||
[[maybe_unused]] std::string_view value) {
|
|
||||||
CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
|
CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicMinF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
const IR::Value& offset, std::string_view value) {
|
||||||
[[maybe_unused]] const IR::Value& offset,
|
CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMin16x2");
|
||||||
[[maybe_unused]] std::string_view value) {
|
|
||||||
throw NotImplementedException("GLSL Instrucion");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
const IR::Value& offset, std::string_view value) {
|
||||||
[[maybe_unused]] const IR::Value& offset,
|
|
||||||
[[maybe_unused]] std::string_view value) {
|
|
||||||
CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2");
|
CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicMaxF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
const IR::Value& offset, std::string_view value) {
|
||||||
[[maybe_unused]] const IR::Value& offset,
|
CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMax16x2");
|
||||||
[[maybe_unused]] std::string_view value) {
|
|
||||||
throw NotImplementedException("GLSL Instrucion");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
[[maybe_unused]] const IR::Value& binding,
|
const IR::Value& offset, std::string_view value) {
|
||||||
[[maybe_unused]] const IR::Value& offset,
|
|
||||||
[[maybe_unused]] std::string_view value) {
|
|
||||||
CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2");
|
CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,13 +62,12 @@ void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value
|
||||||
ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
|
ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) {
|
void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
throw NotImplementedException("GLSL Instruction");
|
ctx.AddU32("{}=packFloat2x16({});", inst, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx,
|
void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
[[maybe_unused]] std::string_view value) {
|
ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value);
|
||||||
throw NotImplementedException("GLSL Instruction");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
|
||||||
|
|
|
@ -224,8 +224,8 @@ void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value)
|
||||||
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitPackFloat2x16(EmitContext& ctx, std::string_view value);
|
void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value);
|
void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
|
||||||
|
|
|
@ -113,6 +113,8 @@ std::string RegAlloc::GetType(Type type, u32 index) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case Type::U1:
|
case Type::U1:
|
||||||
return "bool ";
|
return "bool ";
|
||||||
|
case Type::F16x2:
|
||||||
|
return "f16vec2 ";
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
return "uint ";
|
return "uint ";
|
||||||
case Type::S32:
|
case Type::S32:
|
||||||
|
|
|
@ -18,6 +18,7 @@ enum class Type;
|
||||||
namespace Shader::Backend::GLSL {
|
namespace Shader::Backend::GLSL {
|
||||||
enum class Type : u32 {
|
enum class Type : u32 {
|
||||||
U1,
|
U1,
|
||||||
|
F16x2,
|
||||||
S32,
|
S32,
|
||||||
U32,
|
U32,
|
||||||
F32,
|
F32,
|
||||||
|
|
Loading…
Reference in a new issue