forked from suyu/suyu
Merge pull request #8133 from liamwhite/gl-spv-cbuf
shader_recompiler: support const buffer indirect addressing on OpenGL
This commit is contained in:
commit
4d5900aaa1
6 changed files with 51 additions and 25 deletions
|
@ -35,6 +35,15 @@ std::string_view OutputVertexIndex(EmitContext& ctx) {
|
||||||
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
|
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
|
||||||
|
if (binding.IsImmediate()) {
|
||||||
|
return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
|
||||||
|
} else {
|
||||||
|
const auto binding_var{ctx.var_alloc.Consume(binding)};
|
||||||
|
return fmt::format("GetCbufIndirect({},{})", binding_var, index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
|
void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
|
||||||
const IR::Value& offset, u32 num_bits, std::string_view cast = {},
|
const IR::Value& offset, u32 num_bits, std::string_view cast = {},
|
||||||
std::string_view bit_offset = {}) {
|
std::string_view bit_offset = {}) {
|
||||||
|
@ -55,8 +64,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
|
||||||
const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
|
const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
|
||||||
: fmt::format("[({}>>2)%4]", offset_var)};
|
: fmt::format("[({}>>2)%4]", offset_var)};
|
||||||
|
|
||||||
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
|
const auto cbuf{ChooseCbuf(ctx, binding, index)};
|
||||||
const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
|
const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)};
|
||||||
const auto extraction{num_bits == 32 ? cbuf_cast
|
const auto extraction{num_bits == 32 ? cbuf_cast
|
||||||
: fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
|
: fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
|
||||||
bit_offset, num_bits)};
|
bit_offset, num_bits)};
|
||||||
|
@ -140,9 +149,9 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
|
|
||||||
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
|
|
||||||
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
|
||||||
if (offset.IsImmediate()) {
|
if (offset.IsImmediate()) {
|
||||||
|
const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
|
||||||
static constexpr u32 cbuf_size{0x10000};
|
static constexpr u32 cbuf_size{0x10000};
|
||||||
const u32 u32_offset{offset.U32()};
|
const u32 u32_offset{offset.U32()};
|
||||||
const s32 signed_offset{static_cast<s32>(offset.U32())};
|
const s32 signed_offset{static_cast<s32>(offset.U32())};
|
||||||
|
@ -162,17 +171,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
const auto offset_var{ctx.var_alloc.Consume(offset)};
|
||||||
|
const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))};
|
||||||
if (!ctx.profile.has_gl_component_indexing_bug) {
|
if (!ctx.profile.has_gl_component_indexing_bug) {
|
||||||
ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
|
ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
|
||||||
cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
|
offset_var, cast, cbuf, offset_var);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
|
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
|
||||||
const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
|
const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
|
||||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||||
ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
|
ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
|
||||||
swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
|
cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]);
|
||||||
"xyzw"[(swizzle + 1) % 4]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -359,6 +359,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
||||||
header += "layout(location=0) uniform vec4 scaling;";
|
header += "layout(location=0) uniform vec4 scaling;";
|
||||||
}
|
}
|
||||||
DefineConstantBuffers(bindings);
|
DefineConstantBuffers(bindings);
|
||||||
|
DefineConstantBufferIndirect();
|
||||||
DefineStorageBuffers(bindings);
|
DefineStorageBuffers(bindings);
|
||||||
SetupImages(bindings);
|
SetupImages(bindings);
|
||||||
SetupTextures(bindings);
|
SetupTextures(bindings);
|
||||||
|
@ -436,6 +437,24 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineConstantBufferIndirect() {
|
||||||
|
if (!info.uses_cbuf_indirect) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 ";
|
||||||
|
header += "GetCbufIndirect(uint binding, uint offset){"
|
||||||
|
"switch(binding){"
|
||||||
|
"default:";
|
||||||
|
|
||||||
|
for (const auto& desc : info.constant_buffer_descriptors) {
|
||||||
|
header +=
|
||||||
|
fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index);
|
||||||
|
}
|
||||||
|
|
||||||
|
header += "}}";
|
||||||
|
}
|
||||||
|
|
||||||
void EmitContext::DefineStorageBuffers(Bindings& bindings) {
|
void EmitContext::DefineStorageBuffers(Bindings& bindings) {
|
||||||
if (info.storage_buffers_descriptors.empty()) {
|
if (info.storage_buffers_descriptors.empty()) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -162,6 +162,7 @@ public:
|
||||||
private:
|
private:
|
||||||
void SetupExtensions();
|
void SetupExtensions();
|
||||||
void DefineConstantBuffers(Bindings& bindings);
|
void DefineConstantBuffers(Bindings& bindings);
|
||||||
|
void DefineConstantBufferIndirect();
|
||||||
void DefineStorageBuffers(Bindings& bindings);
|
void DefineStorageBuffers(Bindings& bindings);
|
||||||
void DefineGenericOutput(size_t index, u32 invocations);
|
void DefineGenericOutput(size_t index, u32 invocations);
|
||||||
void DefineHelperFunctions();
|
void DefineHelperFunctions();
|
||||||
|
|
|
@ -1043,15 +1043,15 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
|
||||||
const Id merge_label{OpLabel()};
|
const Id merge_label{OpLabel()};
|
||||||
const Id uniform_type{uniform_types.*member_ptr};
|
const Id uniform_type{uniform_types.*member_ptr};
|
||||||
|
|
||||||
std::array<Id, Info::MAX_CBUFS> buf_labels;
|
std::array<Id, Info::MAX_INDIRECT_CBUFS> buf_labels;
|
||||||
std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals;
|
std::array<Sirit::Literal, Info::MAX_INDIRECT_CBUFS> buf_literals;
|
||||||
for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
|
for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
|
||||||
buf_labels[i] = OpLabel();
|
buf_labels[i] = OpLabel();
|
||||||
buf_literals[i] = Sirit::Literal{i};
|
buf_literals[i] = Sirit::Literal{i};
|
||||||
}
|
}
|
||||||
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||||
OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
|
OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
|
||||||
for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
|
for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
|
||||||
AddLabel(buf_labels[i]);
|
AddLabel(buf_labels[i]);
|
||||||
const Id cbuf{cbufs[i].*member_ptr};
|
const Id cbuf{cbufs[i].*member_ptr};
|
||||||
const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
|
const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
|
||||||
|
@ -1064,22 +1064,23 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
|
||||||
return func;
|
return func;
|
||||||
}};
|
}};
|
||||||
IR::Type types{info.used_indirect_cbuf_types};
|
IR::Type types{info.used_indirect_cbuf_types};
|
||||||
if (True(types & IR::Type::U8)) {
|
bool supports_aliasing = profile.support_descriptor_aliasing;
|
||||||
|
if (supports_aliasing && True(types & IR::Type::U8)) {
|
||||||
load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
|
load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
|
||||||
}
|
}
|
||||||
if (True(types & IR::Type::U16)) {
|
if (supports_aliasing && True(types & IR::Type::U16)) {
|
||||||
load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
|
load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
|
||||||
}
|
}
|
||||||
if (True(types & IR::Type::F32)) {
|
if (supports_aliasing && True(types & IR::Type::F32)) {
|
||||||
load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
|
load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
|
||||||
}
|
}
|
||||||
if (True(types & IR::Type::U32)) {
|
if (supports_aliasing && True(types & IR::Type::U32)) {
|
||||||
load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
|
load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
|
||||||
}
|
}
|
||||||
if (True(types & IR::Type::U32x2)) {
|
if (supports_aliasing && True(types & IR::Type::U32x2)) {
|
||||||
load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
|
load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
|
||||||
}
|
}
|
||||||
if (True(types & IR::Type::U32x4)) {
|
if (!supports_aliasing || True(types & IR::Type::U32x4)) {
|
||||||
load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
|
load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,13 +32,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
|
||||||
void AddRegisterIndexedLdc(Info& info) {
|
void AddRegisterIndexedLdc(Info& info) {
|
||||||
info.uses_cbuf_indirect = true;
|
info.uses_cbuf_indirect = true;
|
||||||
|
|
||||||
// The shader can use any possible constant buffer
|
for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
|
||||||
info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1;
|
AddConstantBufferDescriptor(info, i, 1);
|
||||||
|
|
||||||
auto& cbufs{info.constant_buffer_descriptors};
|
|
||||||
cbufs.clear();
|
|
||||||
for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
|
|
||||||
cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
|
|
||||||
|
|
||||||
// The shader can use any possible access size
|
// The shader can use any possible access size
|
||||||
info.constant_buffer_used_sizes[i] = 0x10'000;
|
info.constant_buffer_used_sizes[i] = 0x10'000;
|
||||||
|
|
|
@ -105,6 +105,7 @@ struct ImageDescriptor {
|
||||||
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
|
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
|
||||||
|
|
||||||
struct Info {
|
struct Info {
|
||||||
|
static constexpr size_t MAX_INDIRECT_CBUFS{14};
|
||||||
static constexpr size_t MAX_CBUFS{18};
|
static constexpr size_t MAX_CBUFS{18};
|
||||||
static constexpr size_t MAX_SSBOS{32};
|
static constexpr size_t MAX_SSBOS{32};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue