1
1
Fork 0
forked from suyu/suyu

Merge pull request #761 from bunnei/improve-raster-cache

Improvements to rasterizer cache
This commit is contained in:
bunnei 2018-07-21 20:28:53 -07:00 committed by GitHub
commit 4cd5df95d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 157 additions and 72 deletions

View file

@ -487,7 +487,12 @@ public:
};
} rt_control;
INSERT_PADDING_WORDS(0x2B);
INSERT_PADDING_WORDS(0x2);
u32 zeta_width;
u32 zeta_height;
INSERT_PADDING_WORDS(0x27);
u32 depth_test_enable;
@ -540,7 +545,11 @@ public:
u32 vb_element_base;
INSERT_PADDING_WORDS(0x49);
INSERT_PADDING_WORDS(0x40);
u32 zeta_enable;
INSERT_PADDING_WORDS(0x8);
struct {
u32 tsc_address_high;
@ -865,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@ -874,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil, 0x4E0);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(stencil_two_side, 0x565);

View file

@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
use_depth_fb = true;
use_depth_fb = regs.zeta_enable != 0;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() {
glClear(clear_mask);
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
if (Settings::values.use_accurate_framebuffers) {
if (dirty_color_surface != nullptr) {
res_cache.FlushSurface(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.FlushSurface(dirty_depth_surface);
}
}
}
@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context;
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0);
SyncDepthTestState();
SyncBlendState();
@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
if (Settings::values.use_accurate_framebuffers) {
if (dirty_color_surface != nullptr) {
res_cache.FlushSurface(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.FlushSurface(dirty_depth_surface);
}
}
}

View file

@ -65,9 +65,9 @@ struct FormatTuple {
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
SurfaceParams params{};
params.addr = zeta_address;
@ -77,9 +77,9 @@ struct FormatTuple {
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.size_in_bytes = params.SizeInBytes();
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.size_in_bytes = params.SizeInBytes();
return params;
}
@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
cur_state.Apply();
}
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
GLuint read_fb_handle, GLuint draw_fb_handle) {
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.draw.read_framebuffer = read_fb_handle;
state.draw.draw_framebuffer = draw_fb_handle;
state.Apply();
u32 buffers{};
if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
buffers = GL_COLOR_BUFFER_BIT;
} else if (type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
} else if (type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
src_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_tex, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
return true;
}
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
texture.Create();
const auto& rect{params.GetRect()};
@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
}
if (using_depth_fb) {
depth_params =
SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
regs.zeta.Address(), regs.zeta.format);
}
MathUtil::Rectangle<u32> color_rect{};
@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
}
void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
if (Settings::values.use_accurate_framebuffers) {
// If enabled, always flush dirty surfaces
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
} else {
// Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
// and flushes are very slow and do not seem to improve accuracy
const auto& params{surface->GetSurfaceParams()};
Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
}
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
return {};
// Check for an exact match in existing surfaces
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
// Look up surface in the cache based on address
const auto& search{surface_cache.find(params.addr)};
Surface surface;
if (search != surface_cache.end()) {
surface = search->second;
if (Settings::values.use_accurate_framebuffers) {
// Reload the surface from Switch memory
LoadSurface(surface);
// If use_accurate_framebuffers is enabled, always load from memory
FlushSurface(surface);
UnregisterSurface(surface);
} else if (surface->GetSurfaceParams() != params) {
// If surface parameters changed, recreate the surface from the old one
return RecreateSurface(surface, params);
} else {
// Use the cached surface as-is
return surface;
}
} else {
surface = std::make_shared<CachedSurface>(params);
RegisterSurface(surface);
LoadSurface(surface);
}
// No surface found - create a new one
surface = std::make_shared<CachedSurface>(params);
RegisterSurface(surface);
LoadSurface(surface);
return surface;
}
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const SurfaceParams& new_params) {
// Verify surface is compatible for blitting
const auto& params{surface->GetSurfaceParams()};
ASSERT(params.type == new_params.type);
ASSERT(params.pixel_format == new_params.pixel_format);
ASSERT(params.component_type == new_params.component_type);
// Create a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{std::make_shared<CachedSurface>(new_params)};
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle,
draw_framebuffer.handle);
// Update cache accordingly
UnregisterSurface(surface);
RegisterSurface(new_surface);
return new_surface;
}
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
// Tries to find the GPU address of a framebuffer based on the CPU address. This is because
// final output framebuffers are specified by CPU address, but internally our GPU cache uses
@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size)
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
const auto& search{surface_cache.find(params.addr)};
if (search != surface_cache.end()) {
// Registered already
return;
}
surface_cache[surface_key] = surface;
surface_cache[params.addr] = surface;
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
}
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
const auto& search{surface_cache.find(params.addr)};
if (search == surface_cache.end()) {
// Unregistered already

View file

@ -10,7 +10,6 @@
#include <vector>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@ -137,6 +136,7 @@ struct SurfaceParams {
ASSERT(static_cast<size_t>(format) < bpp_table.size());
return bpp_table[static_cast<size_t>(format)];
}
u32 GetFormatBpp() const {
return GetFormatBpp(pixel_format);
}
@ -365,9 +365,21 @@ struct SurfaceParams {
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format);
bool operator==(const SurfaceParams& other) const {
return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width,
height, unaligned_height, size_in_bytes) ==
std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format,
other.component_type, other.type, other.width, other.height,
other.unaligned_height, other.size_in_bytes);
}
bool operator!=(const SurfaceParams& other) const {
return !operator==(other);
}
Tegra::GPUVAddr addr;
bool is_tiled;
@ -381,24 +393,6 @@ struct SurfaceParams {
size_t size_in_bytes;
};
/// Hashable variation of SurfaceParams, used for a key in the surface cache
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
static SurfaceKey Create(const SurfaceParams& params) {
SurfaceKey res;
res.state = params;
return res;
}
};
namespace std {
template <>
struct hash<SurfaceKey> {
size_t operator()(const SurfaceKey& k) const {
return k.Hash();
}
};
} // namespace std
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
@ -444,8 +438,8 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport);
/// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
void MarkSurfaceAsDirty(const Surface& surface);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
/// Tries to find a framebuffer GPU address based on the provided CPU address
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
@ -460,6 +454,9 @@ private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params);
/// Recreates a surface with new parameters
Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@ -469,7 +466,7 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
std::unordered_map<SurfaceKey, Surface> surface_cache;
std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
PageMap cached_pages;
OGLFramebuffer read_framebuffer;