gl_rasterizer: Skip VB upload if the state is clean.
This commit is contained in:
parent
0072275d25
commit
97f5c4ffd3
9 changed files with 60 additions and 6 deletions
|
@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
|
|||
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
|
||||
MICROPROFILE_SCOPE(ProcessCommandLists);
|
||||
|
||||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||
maxwell_3d->dirty_flags.OnMemoryWrite();
|
||||
|
||||
auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
|
||||
LOG_TRACE(HW_GPU,
|
||||
"Processing method {:08X} on subchannel {} value "
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
|
@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
|
|||
u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
|
||||
|
||||
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
|
||||
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
||||
rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||
// cache. We do this before actually writing the new data because the destination address
|
||||
|
|
|
@ -3,8 +3,10 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
|
|||
rasterizer.InvalidateRegion(dest_address, sizeof(u32));
|
||||
|
||||
Memory::Write32(dest_address, data);
|
||||
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
||||
state.write_offset++;
|
||||
}
|
||||
|
|
|
@ -123,10 +123,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
|||
|
||||
if (regs.reg_array[method] != value) {
|
||||
regs.reg_array[method] = value;
|
||||
// Vertex format
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
dirty_flags.vertex_attrib_format = true;
|
||||
}
|
||||
|
||||
// Vertex buffer
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||
dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||
dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
}
|
||||
}
|
||||
|
||||
switch (method) {
|
||||
|
@ -258,6 +272,7 @@ void Maxwell3D::ProcessQueryGet() {
|
|||
query_result.timestamp = CoreTiming::GetTicks();
|
||||
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
|
||||
}
|
||||
dirty_flags.OnMemoryWrite();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -334,6 +349,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
|
|||
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
|
||||
|
||||
Memory::Write32(*address, value);
|
||||
dirty_flags.OnMemoryWrite();
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||
|
|
|
@ -1014,6 +1014,11 @@ public:
|
|||
|
||||
struct DirtyFlags {
|
||||
bool vertex_attrib_format = true;
|
||||
u32 vertex_array = 0xFFFFFFFF;
|
||||
|
||||
void OnMemoryWrite() {
|
||||
vertex_array = 0xFFFFFFFF;
|
||||
}
|
||||
};
|
||||
|
||||
DirtyFlags dirty_flags;
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
|
|||
return;
|
||||
}
|
||||
|
||||
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
||||
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
|
||||
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
|
||||
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
|
||||
|
|
|
@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
|
|||
return std::make_tuple(uploaded_ptr, uploaded_offset);
|
||||
}
|
||||
|
||||
void OGLBufferCache::Map(std::size_t max_size) {
|
||||
bool OGLBufferCache::Map(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
|
||||
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
|
||||
|
@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
|
|||
if (invalidate) {
|
||||
InvalidateAll();
|
||||
}
|
||||
return invalidate;
|
||||
}
|
||||
|
||||
void OGLBufferCache::Unmap() {
|
||||
|
|
|
@ -50,7 +50,7 @@ public:
|
|||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
||||
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
|
||||
|
||||
void Map(std::size_t max_size);
|
||||
bool Map(std::size_t max_size);
|
||||
void Unmap();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
|
|
|
@ -183,15 +183,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
|||
}
|
||||
state.draw.vertex_array = VAO.handle;
|
||||
state.ApplyVertexBufferState();
|
||||
|
||||
// Rebinding the VAO invalidates the vertex buffer bindings.
|
||||
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexBuffer() {
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (!gpu.dirty_flags.vertex_array)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (~gpu.dirty_flags.vertex_array & (1u << index))
|
||||
continue;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled())
|
||||
continue;
|
||||
|
@ -218,6 +228,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
|||
|
||||
// Implicit set by glBindVertexBuffer. Stupid glstate handling...
|
||||
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
||||
|
||||
gpu.dirty_flags.vertex_array = 0;
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
|
@ -575,7 +587,7 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
ScopeAcquireGLContext acquire_context{emu_window};
|
||||
|
@ -626,7 +638,11 @@ void RasterizerOpenGL::DrawArrays() {
|
|||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
||||
|
||||
buffer_cache.Map(buffer_size);
|
||||
bool invalidate = buffer_cache.Map(buffer_size);
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
SetupVertexFormat();
|
||||
SetupVertexBuffer();
|
||||
|
|
Loading…
Reference in a new issue