1
0
Fork 0
forked from suyu/suyu

Merge pull request #2163 from ReinUsesLisp/bitset-dirty

maxwell_3d: Use std::bitset to manage dirty flags
This commit is contained in:
bunnei 2019-02-27 20:50:08 -05:00 committed by GitHub
commit f15e2dd881
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 52 deletions

View file

@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
auto debug_context = system.GetGPUDebugContext(); auto debug_context = system.GetGPUDebugContext();
const u32 method = method_call.method;
// It is an error to write to a register other than the current macro's ARG register before it // It is an error to write to a register other than the current macro's ARG register before it
// has finished execution. // has finished execution.
if (executing_macro != 0) { if (executing_macro != 0) {
ASSERT(method_call.method == executing_macro + 1); ASSERT(method == executing_macro + 1);
} }
// Methods after 0xE00 are special, they're actually triggers for some microcode that was // Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization. // uploaded to the GPU during initialization.
if (method_call.method >= MacroRegistersStart) { if (method >= MacroRegistersStart) {
// We're trying to execute a macro // We're trying to execute a macro
if (executing_macro == 0) { if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument. // A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method_call.method % 2) == 0, ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register"); "Can't start macro execution by writing to the ARGS register");
executing_macro = method_call.method; executing_macro = method;
} }
macro_params.push_back(method_call.argument); macro_params.push_back(method_call.argument);
@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
return; return;
} }
ASSERT_MSG(method_call.method < Regs::NUM_REGS, ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure"); "Invalid Maxwell3D register, increase the size of the Regs structure");
if (debug_context) { if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
} }
if (regs.reg_array[method_call.method] != method_call.argument) { if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method_call.method] = method_call.argument; regs.reg_array[method] = method_call.argument;
// Color buffers // Color buffers
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
if (method_call.method >= first_rt_reg && if (method >= first_rt_reg &&
method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); dirty_flags.color_buffer.set(rt_index);
} }
// Zeta buffer // Zeta buffer
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || method == MAXWELL3D_REG_INDEX(zeta_width) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || method == MAXWELL3D_REG_INDEX(zeta_height) ||
(method_call.method >= MAXWELL3D_REG_INDEX(zeta) && (method >= MAXWELL3D_REG_INDEX(zeta) &&
method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
dirty_flags.zeta_buffer = true; dirty_flags.zeta_buffer = true;
} }
// Shader // Shader
constexpr u32 shader_registers_count = constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
dirty_flags.shaders = true; dirty_flags.shaders = true;
} }
// Vertex format // Vertex format
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method_call.method < method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true; dirty_flags.vertex_attrib_format = true;
} }
// Vertex buffer // Vertex buffer
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array |= dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
} else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
dirty_flags.vertex_array |= } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
} else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array |=
1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
} }
} }
switch (method_call.method) { switch (method) {
case MAXWELL3D_REG_INDEX(macros.data): { case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(method_call.argument); ProcessMacroUpload(method_call.argument);
break; break;

View file

@ -5,8 +5,10 @@
#pragma once #pragma once
#include <array> #include <array>
#include <bitset>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
@ -1094,19 +1096,18 @@ public:
MemoryManager& memory_manager; MemoryManager& memory_manager;
struct DirtyFlags { struct DirtyFlags {
u8 color_buffer = 0xFF; std::bitset<8> color_buffer{0xFF};
bool zeta_buffer = true; std::bitset<32> vertex_array{0xFFFFFFFF};
bool shaders = true;
bool vertex_attrib_format = true; bool vertex_attrib_format = true;
u32 vertex_array = 0xFFFFFFFF; bool zeta_buffer = true;
bool shaders = true;
void OnMemoryWrite() { void OnMemoryWrite() {
color_buffer = 0xFF;
zeta_buffer = true; zeta_buffer = true;
shaders = true; shaders = true;
vertex_array = 0xFFFFFFFF; color_buffer.set();
vertex_array.set();
} }
}; };

View file

@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
} }
// Rebinding the VAO invalidates the vertex buffer bindings. // Rebinding the VAO invalidates the vertex buffer bindings.
gpu.dirty_flags.vertex_array = 0xFFFFFFFF; gpu.dirty_flags.vertex_array.set();
state.draw.vertex_array = vao_entry.handle; state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle; return vao_entry.handle;
@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
if (!gpu.dirty_flags.vertex_array) if (gpu.dirty_flags.vertex_array.none())
return; return;
MICROPROFILE_SCOPE(OpenGL_VB); MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer // Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (~gpu.dirty_flags.vertex_array & (1u << index)) if (!gpu.dirty_flags.vertex_array[index])
continue; continue;
const auto& vertex_array = regs.vertex_array[index]; const auto& vertex_array = regs.vertex_array[index];
@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
} }
} }
gpu.dirty_flags.vertex_array = 0; gpu.dirty_flags.vertex_array.reset();
} }
DrawParameters RasterizerOpenGL::SetupDraw() { DrawParameters RasterizerOpenGL::SetupDraw() {
@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) { std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer); MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target}; single_color_target};
if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && if (fb_config_state == current_framebuffer_config_state &&
!gpu.dirty_flags.zeta_buffer) { gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the // single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments // host framebuffer may contain different attachments
@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers // Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
bool invalidate = buffer_cache.Map(buffer_size); const bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) { if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state. // As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty_flags.vertex_array = 0xFFFFFFFF; gpu.dirty_flags.vertex_array.set();
} }
const GLuint vao = SetupVertexFormat(); const GLuint vao = SetupVertexFormat();

View file

@ -962,10 +962,10 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs}; const auto& regs{gpu.regs};
if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { if (!gpu.dirty_flags.color_buffer[index]) {
return last_color_buffers[index]; return last_color_buffers[index];
} }
gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); gpu.dirty_flags.color_buffer.reset(index);
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);