1
0
Fork 0
forked from suyu/suyu

gl_shader_cache: Specialize local memory size for compute shaders

Local memory size in compute shaders was stubbed with an arbitary size.
This commit specializes local memory size from guest GPU parameters.
This commit is contained in:
ReinUsesLisp 2019-11-13 00:25:52 -03:00
parent dbeb523879
commit 287ae2b9e8
No known key found for this signature in database
GPG key ID: 2DFC508897B39CFE
6 changed files with 32 additions and 21 deletions

View file

@ -178,7 +178,12 @@ public:
BitField<24, 5, u32> gpr_alloc;
};
INSERT_PADDING_WORDS(0x11);
union {
BitField<0, 20, u32> local_crs_alloc;
BitField<24, 5, u32> sass_version;
};
INSERT_PADDING_WORDS(0x10);
} launch_description{};
struct {

View file

@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
launch_desc.block_dim_z, launch_desc.shared_alloc);
launch_desc.block_dim_z, launch_desc.shared_alloc,
launch_desc.local_pos_alloc);
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
state.draw.program_pipeline = 0;

View file

@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
source += fmt::format("shared uint smem[{}];",
Common::AlignUp(variant.shared_memory_size, 4) / 4);
}
if (variant.local_memory_size > 0) {
source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
Common::AlignUp(variant.local_memory_size, 4) / 4);
}
}
source += '\n';

View file

@ -510,10 +510,14 @@ private:
}
void DeclareLocalMemory() {
// TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
// specialization time.
const u64 local_memory_size =
stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
if (stage == ProgramType::Compute) {
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
code.AddLine("#endif");
return;
}
const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
@ -851,9 +855,6 @@ private:
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
return {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
@ -1228,9 +1229,6 @@ private:
}
target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};

View file

@ -52,11 +52,11 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{};
};
constexpr u32 NativeVersion = 8;
constexpr u32 NativeVersion = 9;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ProgramVariant) == 32);
static_assert(sizeof(ProgramVariant) == 36);
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};

View file

@ -64,10 +64,10 @@ struct ProgramVariant final {
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
/// Compute constructor.
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
u32 shared_memory_size) noexcept
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
u32 local_memory_size) noexcept
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
shared_memory_size{shared_memory_size} {}
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters.
BaseBindings base_bindings{};
@ -78,12 +78,13 @@ struct ProgramVariant final {
u16 block_y{};
u16 block_z{};
u32 shared_memory_size{};
u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
rhs.block_x, rhs.block_y, rhs.block_z,
rhs.shared_memory_size);
shared_memory_size, local_memory_size) ==
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^
(static_cast<std::size_t>(variant.shared_memory_size) << 16);
(static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
(static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};