shader: Optimize NVN Fallthrough
This commit is contained in:
parent
153a77efee
commit
ee61ec2c39
4 changed files with 83 additions and 9 deletions
|
@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
|
||||||
AddLabel();
|
AddLabel();
|
||||||
const size_t num_buffers{info.storage_buffers_descriptors.size()};
|
const size_t num_buffers{info.storage_buffers_descriptors.size()};
|
||||||
for (size_t index = 0; index < num_buffers; ++index) {
|
for (size_t index = 0; index < num_buffers; ++index) {
|
||||||
|
if (!info.nvn_buffer_used[index]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const auto& ssbo{info.storage_buffers_descriptors[index]};
|
const auto& ssbo{info.storage_buffers_descriptors[index]};
|
||||||
const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
|
const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
|
||||||
const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
|
const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
|
||||||
|
|
|
@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) {
|
||||||
}()};
|
}()};
|
||||||
auto& descs{program.info.storage_buffers_descriptors};
|
auto& descs{program.info.storage_buffers_descriptors};
|
||||||
for (u32 index = 0; index < num_buffers; ++index) {
|
for (u32 index = 0; index < num_buffers; ++index) {
|
||||||
|
if (!program.info.nvn_buffer_used[index]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const u32 offset{base + index * descriptor_size};
|
const u32 offset{base + index * descriptor_size};
|
||||||
const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
|
const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
|
||||||
if (it != descs.end()) {
|
if (it != descs.end()) {
|
||||||
|
it->is_written |= program.info.stores_global_memory;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Assume these are written for now
|
|
||||||
descs.push_back({
|
descs.push_back({
|
||||||
.cbuf_index = driver_cbuf,
|
.cbuf_index = driver_cbuf,
|
||||||
.cbuf_offset = offset,
|
.cbuf_offset = offset,
|
||||||
.count = 1,
|
.count = 1,
|
||||||
.is_written = true,
|
.is_written = program.info.stores_global_memory,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckCBufNVN(Info& info, IR::Inst& inst) {
|
||||||
|
const IR::Value cbuf_index{inst.Arg(0)};
|
||||||
|
if (!cbuf_index.IsImmediate()) {
|
||||||
|
info.nvn_buffer_used.set();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const u32 index{cbuf_index.U32()};
|
||||||
|
if (index != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const IR::Value cbuf_offset{inst.Arg(1)};
|
||||||
|
if (!cbuf_offset.IsImmediate()) {
|
||||||
|
info.nvn_buffer_used.set();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const u32 offset{cbuf_offset.U32()};
|
||||||
|
const u32 descriptor_size{0x10};
|
||||||
|
const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
|
||||||
|
if (offset >= info.nvn_buffer_base && offset < upper_limit) {
|
||||||
|
const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
|
||||||
|
info.nvn_buffer_used.set(nvn_index, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VisitUsages(Info& info, IR::Inst& inst) {
|
void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::CompositeConstructF16x2:
|
case IR::Opcode::CompositeConstructF16x2:
|
||||||
|
@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::LoadGlobalU8:
|
|
||||||
case IR::Opcode::LoadGlobalS8:
|
|
||||||
case IR::Opcode::LoadGlobalU16:
|
|
||||||
case IR::Opcode::LoadGlobalS16:
|
|
||||||
case IR::Opcode::LoadGlobal32:
|
|
||||||
case IR::Opcode::LoadGlobal64:
|
|
||||||
case IR::Opcode::LoadGlobal128:
|
|
||||||
case IR::Opcode::WriteGlobalU8:
|
case IR::Opcode::WriteGlobalU8:
|
||||||
case IR::Opcode::WriteGlobalS8:
|
case IR::Opcode::WriteGlobalS8:
|
||||||
case IR::Opcode::WriteGlobalU16:
|
case IR::Opcode::WriteGlobalU16:
|
||||||
|
@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||||
|
info.stores_global_memory = true;
|
||||||
|
[[fallthrough]];
|
||||||
|
case IR::Opcode::LoadGlobalU8:
|
||||||
|
case IR::Opcode::LoadGlobalS8:
|
||||||
|
case IR::Opcode::LoadGlobalU16:
|
||||||
|
case IR::Opcode::LoadGlobalS16:
|
||||||
|
case IR::Opcode::LoadGlobal32:
|
||||||
|
case IR::Opcode::LoadGlobal64:
|
||||||
|
case IR::Opcode::LoadGlobal128:
|
||||||
info.uses_int64 = true;
|
info.uses_int64 = true;
|
||||||
info.uses_global_memory = true;
|
info.uses_global_memory = true;
|
||||||
info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
|
info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
|
||||||
|
@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VisitCbufs(Info& info, IR::Inst& inst) {
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::GetCbufU8:
|
||||||
|
case IR::Opcode::GetCbufS8:
|
||||||
|
case IR::Opcode::GetCbufU16:
|
||||||
|
case IR::Opcode::GetCbufS16:
|
||||||
|
case IR::Opcode::GetCbufU32:
|
||||||
|
case IR::Opcode::GetCbufF32:
|
||||||
|
case IR::Opcode::GetCbufU32x2: {
|
||||||
|
CheckCBufNVN(info, inst);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Visit(Info& info, IR::Inst& inst) {
|
void Visit(Info& info, IR::Inst& inst) {
|
||||||
VisitUsages(info, inst);
|
VisitUsages(info, inst);
|
||||||
VisitFpModifiers(info, inst);
|
VisitFpModifiers(info, inst);
|
||||||
|
VisitCbufs(info, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GatherInfoFromHeader(Environment& env, Info& info) {
|
void GatherInfoFromHeader(Environment& env, Info& info) {
|
||||||
|
@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
|
||||||
|
|
||||||
void CollectShaderInfoPass(Environment& env, IR::Program& program) {
|
void CollectShaderInfoPass(Environment& env, IR::Program& program) {
|
||||||
Info& info{program.info};
|
Info& info{program.info};
|
||||||
|
const u32 base{[&] {
|
||||||
|
switch (program.stage) {
|
||||||
|
case Stage::VertexA:
|
||||||
|
case Stage::VertexB:
|
||||||
|
return 0x110u;
|
||||||
|
case Stage::TessellationControl:
|
||||||
|
return 0x210u;
|
||||||
|
case Stage::TessellationEval:
|
||||||
|
return 0x310u;
|
||||||
|
case Stage::Geometry:
|
||||||
|
return 0x410u;
|
||||||
|
case Stage::Fragment:
|
||||||
|
return 0x510u;
|
||||||
|
case Stage::Compute:
|
||||||
|
return 0x310u;
|
||||||
|
}
|
||||||
|
throw InvalidArgument("Invalid stage {}", program.stage);
|
||||||
|
}()};
|
||||||
|
info.nvn_buffer_base = base;
|
||||||
|
|
||||||
for (IR::Block* const block : program.post_order_blocks) {
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
Visit(info, inst);
|
Visit(info, inst);
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <bitset>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "shader_recompiler/frontend/ir/type.h"
|
#include "shader_recompiler/frontend/ir/type.h"
|
||||||
|
@ -140,6 +141,7 @@ struct Info {
|
||||||
bool stores_tess_level_outer{};
|
bool stores_tess_level_outer{};
|
||||||
bool stores_tess_level_inner{};
|
bool stores_tess_level_inner{};
|
||||||
bool stores_indexed_attributes{};
|
bool stores_indexed_attributes{};
|
||||||
|
bool stores_global_memory{};
|
||||||
|
|
||||||
bool uses_fp16{};
|
bool uses_fp16{};
|
||||||
bool uses_fp64{};
|
bool uses_fp64{};
|
||||||
|
@ -180,6 +182,8 @@ struct Info {
|
||||||
IR::Type used_storage_buffer_types{};
|
IR::Type used_storage_buffer_types{};
|
||||||
|
|
||||||
u32 constant_buffer_mask{};
|
u32 constant_buffer_mask{};
|
||||||
|
u32 nvn_buffer_base{};
|
||||||
|
std::bitset<16> nvn_buffer_used{};
|
||||||
|
|
||||||
boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
|
boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
|
||||||
constant_buffer_descriptors;
|
constant_buffer_descriptors;
|
||||||
|
|
Loading…
Reference in a new issue