3
0
Fork 0
forked from suyu/suyu

Merge pull request #595 from bunnei/raster-cache

Rewrite the OpenGL rasterizer cache
This commit is contained in:
bunnei 2018-06-29 14:07:28 -04:00 committed by GitHub
commit 50ef2beb58
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 444 additions and 1473 deletions

View file

@ -129,6 +129,7 @@ struct Values {
// Renderer // Renderer
float resolution_factor; float resolution_factor;
bool toggle_framelimit; bool toggle_framelimit;
bool use_accurate_framebuffers;
float bg_red; float bg_red;
float bg_green; float bg_green;

View file

@ -161,6 +161,8 @@ TelemetrySession::TelemetrySession() {
Settings::values.resolution_factor); Settings::values.resolution_factor);
AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit", AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
Settings::values.toggle_framelimit); Settings::values.toggle_framelimit);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers",
Settings::values.use_accurate_framebuffers);
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
Settings::values.use_docked_mode); Settings::values.use_docked_mode);
} }

View file

@ -321,6 +321,24 @@ public:
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
}; };
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
u32 block_dimensions;
u32 array_mode;
u32 layer_stride;
u32 base_layer;
INSERT_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
};
union { union {
struct { struct {
INSERT_PADDING_WORDS(0x45); INSERT_PADDING_WORDS(0x45);
@ -333,23 +351,7 @@ public:
INSERT_PADDING_WORDS(0x1B8); INSERT_PADDING_WORDS(0x1B8);
struct { RenderTargetConfig rt[NumRenderTargets];
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
u32 block_dimensions;
u32 array_mode;
u32 layer_stride;
u32 base_layer;
INSERT_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} rt[NumRenderTargets];
struct { struct {
f32 scale_x; f32 scale_x;

View file

@ -51,9 +51,8 @@ public:
} }
/// Attempt to use a faster method to display the framebuffer to screen /// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
VAddr framebuffer_addr, u32 pixel_stride, u32 pixel_stride, ScreenInfo& screen_info) {
ScreenInfo& screen_info) {
return false; return false;
} }

View file

@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
u64 size = end - start + 1; u64 size = end - start + 1;
// Copy vertex array data // Copy vertex array data
res_cache.FlushRegion(start, size, nullptr);
Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
// Bind the vertex array to the buffer at the current offset. // Bind the vertex array to the buffer at the current offset.
@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() {
std::tie(color_surface, depth_surface, surfaces_rect) = std::tie(color_surface, depth_surface, surfaces_rect) =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
const u16 res_scale = color_surface != nullptr
? color_surface->res_scale
: (depth_surface == nullptr ? 1u : depth_surface->res_scale);
MathUtil::Rectangle<u32> draw_rect{ MathUtil::Rectangle<u32> draw_rect{
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
surfaces_rect.left, surfaces_rect.right)), // Left
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
surfaces_rect.bottom, surfaces_rect.top)), // Top
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
surfaces_rect.left, surfaces_rect.right)), // Right
static_cast<u32>( static_cast<u32>(
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
surfaces_rect.left, surfaces_rect.right)), // Left surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
static_cast<u32>(
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
surfaces_rect.bottom, surfaces_rect.top)), // Top
static_cast<u32>(
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
surfaces_rect.left, surfaces_rect.right)), // Right
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
viewport_rect.bottom * res_scale,
surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
// Bind the framebuffer surfaces // Bind the framebuffer surfaces
BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
// Sync the viewport // Sync the viewport
SyncViewport(surfaces_rect, res_scale); SyncViewport(surfaces_rect);
// Sync the blend state registers // Sync the blend state registers
SyncBlendState(); SyncBlendState();
@ -442,19 +434,11 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply(); state.Apply();
// Mark framebuffer surfaces as dirty // Mark framebuffer surfaces as dirty
MathUtil::Rectangle<u32> draw_rect_unscaled{
draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
draw_rect.bottom / res_scale};
if (color_surface != nullptr && write_color_fb) { if (color_surface != nullptr && write_color_fb) {
auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); res_cache.MarkSurfaceAsDirty(color_surface);
res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
color_surface);
} }
if (depth_surface != nullptr && write_depth_fb) { if (depth_surface != nullptr && write_depth_fb) {
auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); res_cache.MarkSurfaceAsDirty(depth_surface);
res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
depth_surface);
} }
} }
@ -462,7 +446,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
void RasterizerOpenGL::FlushAll() { void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushAll(); res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
} }
void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
@ -472,13 +456,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size, nullptr); res_cache.InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size); res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size, nullptr); res_cache.InvalidateRegion(addr, size);
} }
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
@ -497,45 +481,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {
return true; return true;
} }
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride, VAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) { ScreenInfo& screen_info) {
if (framebuffer_addr == 0) { if (!framebuffer_addr) {
return false; return {};
} }
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
SurfaceParams src_params; const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
src_params.cpu_addr = framebuffer_addr; if (!surface) {
src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); return {};
src_params.width = std::min(framebuffer.width, pixel_stride);
src_params.height = framebuffer.height;
src_params.stride = pixel_stride;
src_params.is_tiled = true;
src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
src_params.pixel_format =
SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
src_params.component_type =
SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
src_params.UpdateParams();
MathUtil::Rectangle<u32> src_rect;
Surface src_surface;
std::tie(src_surface, src_rect) =
res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
if (src_surface == nullptr) {
return false;
} }
u32 scaled_width = src_surface->GetScaledWidth(); // Verify that the cached surface is the same size and format as the requested framebuffer
u32 scaled_height = src_surface->GetScaledHeight(); const auto& params{surface->GetSurfaceParams()};
const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
screen_info.display_texcoords = MathUtil::Rectangle<float>( screen_info.display_texture = surface->Texture().handle;
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
(float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
screen_info.display_texture = src_surface->texture.handle;
return true; return true;
} }
@ -674,7 +641,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture); Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) { if (surface != nullptr) {
state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
state.texture_units[current_bindpoint].swizzle.r = state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source); MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g = state.texture_units[current_bindpoint].swizzle.g =
@ -700,16 +667,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
state.Apply(); state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color_surface != nullptr ? color_surface->texture.handle : 0, 0); color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
if (depth_surface != nullptr) { if (depth_surface != nullptr) {
if (has_stencil) { if (has_stencil) {
// attach both depth and stencil // attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
depth_surface->texture.handle, 0); depth_surface->Texture().handle, 0);
} else { } else {
// attach depth // attach depth
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
depth_surface->texture.handle, 0); depth_surface->Texture().handle, 0);
// clear stencil attachment // clear stencil attachment
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
} }
@ -720,14 +687,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
} }
} }
void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
} }
void RasterizerOpenGL::SyncClipEnabled() { void RasterizerOpenGL::SyncClipEnabled() {

View file

@ -109,7 +109,7 @@ private:
u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
/// Syncs the viewport to match the guest state /// Syncs the viewport to match the guest state
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
/// Syncs the clip enabled status to match the guest state /// Syncs the clip enabled status to match the guest state
void SyncClipEnabled(); void SyncClipEnabled();

File diff suppressed because it is too large Load diff

View file

@ -1,57 +1,26 @@
// Copyright 2015 Citra Emulator Project // Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #pragma once
#include <array> #include <array>
#include <map>
#include <memory> #include <memory>
#include <set> #include <vector>
#include <tuple>
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#endif
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#include <boost/optional.hpp>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "video_core/gpu.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
struct CachedSurface; class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>; using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSet = std::set<Surface>;
using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>;
using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>;
using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>;
using SurfaceInterval = SurfaceCache::interval_type;
static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
"incorrect interval types");
using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u64, int>; using PageMap = boost::icl::interval_map<u64, int>;
enum class ScaleMatch {
Exact, // only accept same res scale
Upscale, // only allow higher scale than params
Ignore // accept every scaled res
};
struct SurfaceParams { struct SurfaceParams {
enum class PixelFormat { enum class PixelFormat {
ABGR8 = 0, ABGR8 = 0,
@ -93,10 +62,10 @@ struct SurfaceParams {
/** /**
* Gets the compression factor for the specified PixelFormat. This applies to just the * Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a * "compressed width" and "compressed height", not the overall compression factor of a
* compressed image. This is used for maintaining proper surface sizes for compressed texture * compressed image. This is used for maintaining proper surface sizes for compressed
* formats. * texture formats.
*/ */
static constexpr u32 GetCompresssionFactor(PixelFormat format) { static constexpr u32 GetCompressionFactor(PixelFormat format) {
if (format == PixelFormat::Invalid) if (format == PixelFormat::Invalid)
return 0; return 0;
@ -112,15 +81,12 @@ struct SurfaceParams {
4, // DXT23 4, // DXT23
4, // DXT45 4, // DXT45
4, // DXN1 4, // DXN1
1, // ASTC_2D_4X4 4, // ASTC_2D_4X4
}}; }};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
return compression_factor_table[static_cast<size_t>(format)]; return compression_factor_table[static_cast<size_t>(format)];
} }
u32 GetCompresssionFactor() const {
return GetCompresssionFactor(pixel_format);
}
static constexpr u32 GetFormatBpp(PixelFormat format) { static constexpr u32 GetFormatBpp(PixelFormat format) {
if (format == PixelFormat::Invalid) if (format == PixelFormat::Invalid)
@ -165,25 +131,6 @@ struct SurfaceParams {
} }
} }
static bool IsFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return true;
default:
return false;
}
}
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) {
// TODO(Subv): Properly implement this // TODO(Subv): Properly implement this
switch (format) { switch (format) {
@ -276,36 +223,16 @@ struct SurfaceParams {
} }
} }
static ComponentType ComponentTypeFromGPUPixelFormat( static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
Tegra::FramebufferConfig::PixelFormat format) {
switch (format) { switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8: case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return ComponentType::UNorm; return PixelFormat::ABGR8;
default: default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE(); UNREACHABLE();
} }
} }
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
return true;
}
if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
return true;
}
if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
return true;
}
return false;
}
static SurfaceType GetFormatType(PixelFormat pixel_format) { static SurfaceType GetFormatType(PixelFormat pixel_format) {
if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
return SurfaceType::ColorTexture; return SurfaceType::ColorTexture;
@ -317,168 +244,101 @@ struct SurfaceParams {
return SurfaceType::Invalid; return SurfaceType::Invalid;
} }
/// Update the params "size", "end" and "type" from the already set "addr", "width", "height" /// Returns the rectangle corresponding to this surface
/// and "pixel_format" MathUtil::Rectangle<u32> GetRect() const;
void UpdateParams() {
if (stride == 0) { /// Returns the size of this surface in bytes, adjusted for compression
stride = width; size_t SizeInBytes() const {
} const u32 compression_factor{GetCompressionFactor(pixel_format)};
type = GetFormatType(pixel_format); ASSERT(width % compression_factor == 0);
size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) ASSERT(height % compression_factor == 0);
: BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); return (width / compression_factor) * (height / compression_factor) *
end = addr + size; GetFormatBpp(pixel_format) / CHAR_BIT;
}
SurfaceInterval GetInterval() const {
return SurfaceInterval::right_open(addr, end);
}
// Returns the outer rectangle containing "interval"
SurfaceParams FromInterval(SurfaceInterval interval) const;
SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
// Returns the region of the biggest valid rectange within interval
SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
/**
* Gets the actual width (in pixels) of the surface. This is provided because `width` is used
* for tracking the surface region in memory, which may be compressed for certain formats. In
* this scenario, `width` is actually the compressed width.
*/
u32 GetActualWidth() const {
return width * GetCompresssionFactor();
}
/**
* Gets the actual height (in pixels) of the surface. This is provided because `height` is used
* for tracking the surface region in memory, which may be compressed for certain formats. In
* this scenario, `height` is actually the compressed height.
*/
u32 GetActualHeight() const {
return height * GetCompresssionFactor();
}
u32 GetScaledWidth() const {
return width * res_scale;
}
u32 GetScaledHeight() const {
return height * res_scale;
}
MathUtil::Rectangle<u32> GetRect() const {
return {0, height, width, 0};
}
MathUtil::Rectangle<u32> GetScaledRect() const {
return {0, GetScaledHeight(), GetScaledWidth(), 0};
}
u64 PixelsInBytes(u64 size) const {
return size * CHAR_BIT / GetFormatBpp(pixel_format);
}
u64 BytesInPixels(u64 pixels) const {
return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
} }
/// Returns the CPU virtual address for this surface
VAddr GetCpuAddr() const; VAddr GetCpuAddr() const;
bool ExactMatch(const SurfaceParams& other_surface) const; /// Returns true if the specified region overlaps with this surface's region in Switch memory
bool CanSubRect(const SurfaceParams& sub_surface) const; bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
bool CanExpand(const SurfaceParams& expanded_surface) const; return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
bool CanTexCopy(const SurfaceParams& texcopy_params) const; }
MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; /// Creates SurfaceParams from a texture configation
MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
Tegra::GPUVAddr addr = 0; /// Creates SurfaceParams from a framebuffer configation
Tegra::GPUVAddr end = 0; static SurfaceParams CreateForFramebuffer(
boost::optional<VAddr> cpu_addr; const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
u64 size = 0;
u32 width = 0; Tegra::GPUVAddr addr;
u32 height = 0; bool is_tiled;
u32 stride = 0; u32 block_height;
u32 block_height = 0; PixelFormat pixel_format;
u16 res_scale = 1; ComponentType component_type;
SurfaceType type;
bool is_tiled = false; u32 width;
PixelFormat pixel_format = PixelFormat::Invalid; u32 height;
SurfaceType type = SurfaceType::Invalid; u32 unaligned_height;
ComponentType component_type = ComponentType::Invalid; size_t size_in_bytes;
}; };
struct CachedSurface : SurfaceParams { /// Hashable variation of SurfaceParams, used for a key in the surface cache
bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; static SurfaceKey Create(const SurfaceParams& params) {
SurfaceKey res;
res.state = params;
return res;
}
};
bool IsRegionValid(SurfaceInterval interval) const { namespace std {
return (invalid_regions.find(interval) == invalid_regions.end()); template <>
struct hash<SurfaceKey> {
size_t operator()(const SurfaceKey& k) const {
return k.Hash();
}
};
} // namespace std
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
const OGLTexture& Texture() const {
return texture;
} }
bool IsSurfaceFullyInvalid() const { static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); if (format == SurfaceParams::PixelFormat::Invalid)
}
bool registered = false;
SurfaceRegions invalid_regions;
u64 fill_size = 0; /// Number of bytes to read from fill_data
std::array<u8, 4> fill_data;
OGLTexture texture;
static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0; return 0;
return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
} }
std::unique_ptr<u8[]> gl_buffer; const SurfaceParams& GetSurfaceParams() const {
size_t gl_buffer_size = 0; return params;
}
// Read/Write data in Switch memory to/from gl_buffer // Read/Write data in Switch memory to/from gl_buffer
void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); void LoadGLBuffer();
void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); void FlushGLBuffer();
// Upload/Download data in gl_buffer in/to this surface's texture // Upload/Download data in gl_buffer in/to this surface's texture
void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
GLuint draw_fb_handle); void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
GLuint draw_fb_handle); private:
OGLTexture texture;
std::vector<u8> gl_buffer;
SurfaceParams params;
}; };
class RasterizerCacheOpenGL : NonCopyable { class RasterizerCacheOpenGL final : NonCopyable {
public: public:
RasterizerCacheOpenGL(); RasterizerCacheOpenGL();
~RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL();
/// Blit one surface's texture to another
bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect,
GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect);
/// Copy one surface's region to another
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
SurfaceInterval copy_interval);
/// Load a texture from Switch memory to OpenGL and cache it (if not already cached)
Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Tries to find a framebuffer GPU address based on the provided CPU address
boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const;
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// Switch memory to OpenGL and caches it (if not already cached)
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Get a surface based on the texture configuration /// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
@ -486,29 +346,21 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport); const MathUtil::Rectangle<s32>& viewport);
/// Get a surface that matches the fill config /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
Surface GetFillSurface(const void* config); void MarkSurfaceAsDirty(const Surface& surface);
/// Get a surface that matches a "texture copy" display transfer config /// Tries to find a framebuffer GPU address based on the provided CPU address
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
/// Write any cached resources overlapping the region back to memory (if dirty) /// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); void FlushRegion(Tegra::GPUVAddr addr, size_t size);
/// Mark region as being invalidated by region_owner (nullptr if Switch memory) /// Mark the specified region as being invalidated
void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
/// Flush all cached resources tracked by this cache manager
void FlushAll();
private: private:
void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params);
/// Update surface's texture for given region when necessary
void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size);
/// Create a new surface
Surface CreateSurface(const SurfaceParams& params);
/// Register surface into the cache /// Register surface into the cache
void RegisterSurface(const Surface& surface); void RegisterSurface(const Surface& surface);
@ -519,18 +371,9 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region /// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
SurfaceCache surface_cache; std::unordered_map<SurfaceKey, Surface> surface_cache;
PageMap cached_pages; PageMap cached_pages;
SurfaceMap dirty_regions;
SurfaceSet remove_surfaces;
OGLFramebuffer read_framebuffer; OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer; OGLFramebuffer draw_framebuffer;
OGLVertexArray attributeless_vao;
OGLBuffer d24s8_abgr_buffer;
GLsizeiptr d24s8_abgr_buffer_size;
OGLProgram d24s8_abgr_shader;
GLint d24s8_abgr_tbo_size_u_id;
GLint d24s8_abgr_viewport_u_id;
}; };

View file

@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
screen_info)) { screen_info)) {
// Reset the screen info's display texture to its own permanent texture // Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texture = screen_info.texture.resource.handle;
screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
Memory::FlushMode::Flush); Memory::FlushMode::Flush);

View file

@ -27,7 +27,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen /// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo { struct ScreenInfo {
GLuint display_texture; GLuint display_texture;
MathUtil::Rectangle<float> display_texcoords; const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture; TextureInfo texture;
}; };

View file

@ -84,6 +84,8 @@ void Config::ReadValues() {
qt_config->beginGroup("Renderer"); qt_config->beginGroup("Renderer");
Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool();
Settings::values.use_accurate_framebuffers =
qt_config->value("use_accurate_framebuffers", false).toBool();
Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@ -184,6 +186,7 @@ void Config::SaveValues() {
qt_config->beginGroup("Renderer"); qt_config->beginGroup("Renderer");
qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit); qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit);
qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers);
// Cast to double because Qt's written float values are not human-readable // Cast to double because Qt's written float values are not human-readable
qt_config->setValue("bg_red", (double)Settings::values.bg_red); qt_config->setValue("bg_red", (double)Settings::values.bg_red);

View file

@ -59,11 +59,13 @@ void ConfigureGraphics::setConfiguration() {
ui->resolution_factor_combobox->setCurrentIndex( ui->resolution_factor_combobox->setCurrentIndex(
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit); ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit);
ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);
} }
void ConfigureGraphics::applyConfiguration() { void ConfigureGraphics::applyConfiguration() {
Settings::values.resolution_factor = Settings::values.resolution_factor =
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked(); Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked();
Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();
Settings::Apply(); Settings::Apply();
} }

View file

@ -29,6 +29,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="use_accurate_framebuffers">
<property name="text">
<string>Use accurate framebuffers (slow)</string>
</property>
</widget>
</item>
<item> <item>
<layout class="QHBoxLayout" name="horizontalLayout"> <layout class="QHBoxLayout" name="horizontalLayout">
<item> <item>

View file

@ -98,6 +98,8 @@ void Config::ReadValues() {
(float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
Settings::values.toggle_framelimit = Settings::values.toggle_framelimit =
sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true);
Settings::values.use_accurate_framebuffers =
sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false);
Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);

View file

@ -102,6 +102,10 @@ resolution_factor =
# 0 (default): Off, 1: On # 0 (default): Off, 1: On
use_vsync = use_vsync =
# Whether to use accurate framebuffers
# 0 (default): Off (fast), 1 : On (slow)
use_accurate_framebuffers =
# The clear color for the renderer. What shows up on the sides of the bottom screen. # The clear color for the renderer. What shows up on the sides of the bottom screen.
# Must be in range of 0.0-1.0. Defaults to 1.0 for all. # Must be in range of 0.0-1.0. Defaults to 1.0 for all.
bg_red = bg_red =