forked from suyu/suyu
gl_arb_decompiler: Implement robust buffer operations
This emulates the behavior we get on GLSL with regular SSBOs with a pointer + length pair. It aims to be consistent with the crashes we might get. Out of bounds stores are ignored. Atomics are ignored and return zero. Reads return zero.
This commit is contained in:
parent
e7a26ecec5
commit
f21a189148
3 changed files with 54 additions and 33 deletions
|
@ -376,9 +376,11 @@ private:
|
|||
std::string temporary = AllocTemporary();
|
||||
std::string address;
|
||||
std::string_view opname;
|
||||
bool robust = false;
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||
address = GlobalMemoryPointer(*gmem);
|
||||
opname = "ATOM";
|
||||
robust = true;
|
||||
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
||||
opname = "ATOMS";
|
||||
|
@ -386,7 +388,15 @@ private:
|
|||
UNREACHABLE();
|
||||
return "{0, 0, 0, 0}";
|
||||
}
|
||||
if (robust) {
|
||||
AddLine("IF NE.x;");
|
||||
}
|
||||
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
|
||||
if (robust) {
|
||||
AddLine("ELSE;");
|
||||
AddLine("MOV.S {}, 0;", temporary);
|
||||
AddLine("ENDIF;");
|
||||
}
|
||||
return temporary;
|
||||
}
|
||||
|
||||
|
@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
|
|||
}
|
||||
|
||||
void ARBDecompiler::DeclareGlobalMemory() {
|
||||
const std::size_t num_entries = ir.GetGlobalMemory().size();
|
||||
const size_t num_entries = ir.GetGlobalMemory().size();
|
||||
if (num_entries > 0) {
|
||||
const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
|
||||
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
|
||||
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
|||
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||
std::string temporary = AllocTemporary();
|
||||
AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
|
||||
AddLine("MOV {}, 0;", temporary);
|
||||
AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
|
||||
return temporary;
|
||||
}
|
||||
|
||||
|
@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
|||
}
|
||||
|
||||
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
||||
// Read a bindless SSBO, return its address and set CC accordingly
|
||||
// address = c[binding].xy
|
||||
// length = c[binding].z
|
||||
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
||||
const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
|
||||
|
||||
const std::string pointer = AllocLongVectorTemporary();
|
||||
std::string temporary = AllocTemporary();
|
||||
|
||||
const u32 local_index = binding / 2;
|
||||
AddLine("PK64.U {}, c[{}];", pointer, local_index);
|
||||
AddLine("PK64.U {}, c[{}];", pointer, binding);
|
||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
||||
Visit(gmem.GetBaseAddress()));
|
||||
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
||||
AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
|
||||
AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
|
||||
// Compare offset to length and set CC
|
||||
AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
|
||||
return fmt::format("{}.x", pointer);
|
||||
}
|
||||
|
||||
|
@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
|
|||
ResetTemporaries();
|
||||
return {};
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||
AddLine("IF NE.x;");
|
||||
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
||||
AddLine("ENDIF;");
|
||||
ResetTemporaries();
|
||||
return {};
|
||||
} else {
|
||||
|
|
|
@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) {
|
|||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
|
||||
if (num_entries == 0) {
|
||||
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||
if (num_ssbos == 0) {
|
||||
return;
|
||||
}
|
||||
if (num_entries % 2 == 1) {
|
||||
pointers[num_entries] = 0;
|
||||
}
|
||||
const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
|
||||
glProgramLocalParametersI4uivNV(target, 0, num_vectors,
|
||||
reinterpret_cast<const GLuint*>(pointers));
|
||||
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT = {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
|
||||
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = maxwell3d.state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
|
@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
|||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<GLuint64EXT, 32> pointers;
|
||||
ASSERT(entries.size() < pointers.size());
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||
|
@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
|||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (assembly_shaders) {
|
||||
UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
|
||||
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
|||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<GLuint64EXT, 32> pointers;
|
||||
ASSERT(entries.size() < pointers.size());
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
|
||||
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
GLuint64EXT* pointer) {
|
||||
const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
*pointer = info.address + info.offset;
|
||||
*ssbo = BindlessSSBO{
|
||||
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
|
|
|
@ -53,6 +53,13 @@ namespace OpenGL {
|
|||
struct ScreenInfo;
|
||||
struct DrawParameters;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
GLsizei padding;
|
||||
};
|
||||
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
|
||||
|
||||
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
|
||||
|
@ -126,7 +133,7 @@ private:
|
|||
|
||||
/// Configures a global memory buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size, GLuint64EXT* pointer);
|
||||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||
|
|
Loading…
Reference in a new issue