shader: Optimize NVN Fallthrough
This commit is contained in:
parent
153a77efee
commit
ee61ec2c39
4 changed files with 83 additions and 9 deletions
|
@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
|
|||
AddLabel();
|
||||
const size_t num_buffers{info.storage_buffers_descriptors.size()};
|
||||
for (size_t index = 0; index < num_buffers; ++index) {
|
||||
if (!info.nvn_buffer_used[index]) {
|
||||
continue;
|
||||
}
|
||||
const auto& ssbo{info.storage_buffers_descriptors[index]};
|
||||
const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
|
||||
const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
|
||||
|
|
|
@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) {
|
|||
}()};
|
||||
auto& descs{program.info.storage_buffers_descriptors};
|
||||
for (u32 index = 0; index < num_buffers; ++index) {
|
||||
if (!program.info.nvn_buffer_used[index]) {
|
||||
continue;
|
||||
}
|
||||
const u32 offset{base + index * descriptor_size};
|
||||
const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
|
||||
if (it != descs.end()) {
|
||||
it->is_written |= program.info.stores_global_memory;
|
||||
continue;
|
||||
}
|
||||
// Assume these are written for now
|
||||
descs.push_back({
|
||||
.cbuf_index = driver_cbuf,
|
||||
.cbuf_offset = offset,
|
||||
.count = 1,
|
||||
.is_written = true,
|
||||
.is_written = program.info.stores_global_memory,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) {
|
|||
}
|
||||
}
|
||||
|
||||
void CheckCBufNVN(Info& info, IR::Inst& inst) {
|
||||
const IR::Value cbuf_index{inst.Arg(0)};
|
||||
if (!cbuf_index.IsImmediate()) {
|
||||
info.nvn_buffer_used.set();
|
||||
return;
|
||||
}
|
||||
const u32 index{cbuf_index.U32()};
|
||||
if (index != 0) {
|
||||
return;
|
||||
}
|
||||
const IR::Value cbuf_offset{inst.Arg(1)};
|
||||
if (!cbuf_offset.IsImmediate()) {
|
||||
info.nvn_buffer_used.set();
|
||||
return;
|
||||
}
|
||||
const u32 offset{cbuf_offset.U32()};
|
||||
const u32 descriptor_size{0x10};
|
||||
const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
|
||||
if (offset >= info.nvn_buffer_base && offset < upper_limit) {
|
||||
const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
|
||||
info.nvn_buffer_used.set(nvn_index, true);
|
||||
}
|
||||
}
|
||||
|
||||
void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::CompositeConstructF16x2:
|
||||
|
@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
break;
|
||||
}
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
|
@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
info.stores_global_memory = true;
|
||||
[[fallthrough]];
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
info.uses_int64 = true;
|
||||
info.uses_global_memory = true;
|
||||
info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
|
||||
|
@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void VisitCbufs(Info& info, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetCbufU8:
|
||||
case IR::Opcode::GetCbufS8:
|
||||
case IR::Opcode::GetCbufU16:
|
||||
case IR::Opcode::GetCbufS16:
|
||||
case IR::Opcode::GetCbufU32:
|
||||
case IR::Opcode::GetCbufF32:
|
||||
case IR::Opcode::GetCbufU32x2: {
|
||||
CheckCBufNVN(info, inst);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Visit(Info& info, IR::Inst& inst) {
|
||||
VisitUsages(info, inst);
|
||||
VisitFpModifiers(info, inst);
|
||||
VisitCbufs(info, inst);
|
||||
}
|
||||
|
||||
void GatherInfoFromHeader(Environment& env, Info& info) {
|
||||
|
@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
|
|||
|
||||
void CollectShaderInfoPass(Environment& env, IR::Program& program) {
|
||||
Info& info{program.info};
|
||||
const u32 base{[&] {
|
||||
switch (program.stage) {
|
||||
case Stage::VertexA:
|
||||
case Stage::VertexB:
|
||||
return 0x110u;
|
||||
case Stage::TessellationControl:
|
||||
return 0x210u;
|
||||
case Stage::TessellationEval:
|
||||
return 0x310u;
|
||||
case Stage::Geometry:
|
||||
return 0x410u;
|
||||
case Stage::Fragment:
|
||||
return 0x510u;
|
||||
case Stage::Compute:
|
||||
return 0x310u;
|
||||
}
|
||||
throw InvalidArgument("Invalid stage {}", program.stage);
|
||||
}()};
|
||||
info.nvn_buffer_base = base;
|
||||
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Visit(info, inst);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/type.h"
|
||||
|
@ -140,6 +141,7 @@ struct Info {
|
|||
bool stores_tess_level_outer{};
|
||||
bool stores_tess_level_inner{};
|
||||
bool stores_indexed_attributes{};
|
||||
bool stores_global_memory{};
|
||||
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
|
@ -180,6 +182,8 @@ struct Info {
|
|||
IR::Type used_storage_buffer_types{};
|
||||
|
||||
u32 constant_buffer_mask{};
|
||||
u32 nvn_buffer_base{};
|
||||
std::bitset<16> nvn_buffer_used{};
|
||||
|
||||
boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
|
||||
constant_buffer_descriptors;
|
||||
|
|
Loading…
Reference in a new issue