forked from suyu/suyu
f465e4aaf2
We uploaded the wrong data before. So the offset on the host GPU pointer may work for the first vertices, the last ones run out bounds. Let's just offset the upload instead.
886 lines
35 KiB
C++
886 lines
35 KiB
C++
// Copyright 2015 Citra Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <glad/glad.h>
|
|
#include "common/alignment.h"
|
|
#include "common/assert.h"
|
|
#include "common/logging/log.h"
|
|
#include "common/math_util.h"
|
|
#include "common/microprofile.h"
|
|
#include "common/scope_exit.h"
|
|
#include "core/core.h"
|
|
#include "core/frontend/emu_window.h"
|
|
#include "core/hle/kernel/process.h"
|
|
#include "core/settings.h"
|
|
#include "video_core/engines/maxwell_3d.h"
|
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
|
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
|
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
|
#include "video_core/renderer_opengl/renderer_opengl.h"
|
|
#include "video_core/video_core.h"
|
|
|
|
namespace OpenGL {
|
|
|
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
|
using PixelFormat = SurfaceParams::PixelFormat;
|
|
using SurfaceType = SurfaceParams::SurfaceType;
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
|
|
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
|
|
|
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
|
|
: emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
|
|
// Create sampler objects
|
|
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
|
|
texture_samplers[i].Create();
|
|
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
|
|
}
|
|
|
|
GLint ext_num;
|
|
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
|
|
for (GLint i = 0; i < ext_num; i++) {
|
|
const std::string_view extension{
|
|
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
|
|
|
|
if (extension == "GL_ARB_direct_state_access") {
|
|
has_ARB_direct_state_access = true;
|
|
} else if (extension == "GL_ARB_multi_bind") {
|
|
has_ARB_multi_bind = true;
|
|
} else if (extension == "GL_ARB_separate_shader_objects") {
|
|
has_ARB_separate_shader_objects = true;
|
|
} else if (extension == "GL_ARB_vertex_attrib_binding") {
|
|
has_ARB_vertex_attrib_binding = true;
|
|
}
|
|
}
|
|
|
|
ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
|
|
|
|
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
|
|
state.clip_distance[0] = true;
|
|
|
|
// Create render framebuffer
|
|
framebuffer.Create();
|
|
|
|
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
|
state.draw.shader_program = 0;
|
|
state.Apply();
|
|
|
|
glEnable(GL_BLEND);
|
|
|
|
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
|
|
|
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
|
|
}
|
|
|
|
RasterizerOpenGL::~RasterizerOpenGL() {}
|
|
|
|
void RasterizerOpenGL::SetupVertexArrays() {
|
|
MICROPROFILE_SCOPE(OpenGL_VAO);
|
|
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
|
const auto& regs = gpu.regs;
|
|
|
|
auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
|
|
auto& VAO = iter->second;
|
|
|
|
if (is_cache_miss) {
|
|
VAO.Create();
|
|
state.draw.vertex_array = VAO.handle;
|
|
state.Apply();
|
|
|
|
// The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
|
|
// around.
|
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
|
|
|
|
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
|
|
// Enables the first 16 vertex attributes always, as we don't know which ones are actually
|
|
// used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
|
|
// for now to avoid OpenGL errors.
|
|
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
|
// assume every shader uses them all.
|
|
for (unsigned index = 0; index < 16; ++index) {
|
|
const auto& attrib = regs.vertex_attrib_format[index];
|
|
|
|
// Ignore invalid attributes.
|
|
if (!attrib.IsValid())
|
|
continue;
|
|
|
|
const auto& buffer = regs.vertex_array[attrib.buffer];
|
|
LOG_TRACE(HW_GPU,
|
|
"vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
|
|
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
|
|
attrib.offset.Value(), attrib.IsNormalized());
|
|
|
|
ASSERT(buffer.IsEnabled());
|
|
|
|
glEnableVertexAttribArray(index);
|
|
if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
|
|
attrib.type ==
|
|
Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
|
|
glVertexAttribIFormat(index, attrib.ComponentCount(),
|
|
MaxwellToGL::VertexType(attrib), attrib.offset);
|
|
} else {
|
|
glVertexAttribFormat(index, attrib.ComponentCount(),
|
|
MaxwellToGL::VertexType(attrib),
|
|
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
|
|
}
|
|
glVertexAttribBinding(index, attrib.buffer);
|
|
}
|
|
}
|
|
state.draw.vertex_array = VAO.handle;
|
|
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
|
state.Apply();
|
|
|
|
// Upload all guest vertex arrays sequentially to our buffer
|
|
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
|
const auto& vertex_array = regs.vertex_array[index];
|
|
if (!vertex_array.IsEnabled())
|
|
continue;
|
|
|
|
Tegra::GPUVAddr start = vertex_array.StartAddress();
|
|
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
|
|
|
ASSERT(end > start);
|
|
const u64 size = end - start + 1;
|
|
const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
|
|
|
|
// Bind the vertex array to the buffer at the current offset.
|
|
glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset,
|
|
vertex_array.stride);
|
|
|
|
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
|
|
// Enable vertex buffer instancing with the specified divisor.
|
|
glVertexBindingDivisor(index, vertex_array.divisor);
|
|
} else {
|
|
// Disable the vertex buffer instancing.
|
|
glVertexBindingDivisor(index, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
void RasterizerOpenGL::SetupShaders() {
|
|
MICROPROFILE_SCOPE(OpenGL_Shader);
|
|
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
|
|
|
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
|
|
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
|
|
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
|
|
u32 current_texture_bindpoint = 0;
|
|
|
|
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
|
const auto& shader_config = gpu.regs.shader_config[index];
|
|
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
|
|
|
|
// Skip stages that are not enabled
|
|
if (!gpu.regs.IsShaderConfigEnabled(index)) {
|
|
continue;
|
|
}
|
|
|
|
const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
|
|
|
|
GLShader::MaxwellUniformData ubo{};
|
|
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
|
|
const GLintptr offset = buffer_cache.UploadHostMemory(
|
|
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
|
|
|
|
// Bind the buffer
|
|
glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
|
|
|
|
Shader shader{shader_cache.GetStageProgram(program)};
|
|
|
|
switch (program) {
|
|
case Maxwell::ShaderProgram::VertexA:
|
|
case Maxwell::ShaderProgram::VertexB: {
|
|
shader_program_manager->UseProgrammableVertexShader(shader->GetProgramHandle());
|
|
break;
|
|
}
|
|
case Maxwell::ShaderProgram::Fragment: {
|
|
shader_program_manager->UseProgrammableFragmentShader(shader->GetProgramHandle());
|
|
break;
|
|
}
|
|
default:
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
|
shader_config.enable.Value(), shader_config.offset);
|
|
UNREACHABLE();
|
|
}
|
|
|
|
// Configure the const buffers for this shader stage.
|
|
current_constbuffer_bindpoint = SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage),
|
|
shader, current_constbuffer_bindpoint);
|
|
|
|
// Configure the textures for this shader stage.
|
|
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
|
|
current_texture_bindpoint);
|
|
|
|
// When VertexA is enabled, we have dual vertex shaders
|
|
if (program == Maxwell::ShaderProgram::VertexA) {
|
|
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
|
index++;
|
|
}
|
|
}
|
|
|
|
state.Apply();
|
|
|
|
shader_program_manager->UseTrivialGeometryShader();
|
|
}
|
|
|
|
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
|
|
std::size_t size = 0;
|
|
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
|
if (!regs.vertex_array[index].IsEnabled())
|
|
continue;
|
|
|
|
const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
|
|
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
|
|
|
ASSERT(end > start);
|
|
size += end - start + 1;
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
|
|
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
|
|
DrawArrays();
|
|
return true;
|
|
}
|
|
|
|
template <typename Map, typename Interval>
|
|
static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
|
|
return boost::make_iterator_range(map.equal_range(interval));
|
|
}
|
|
|
|
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
|
|
const u64 page_start{addr >> Memory::PAGE_BITS};
|
|
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
|
|
|
|
// Interval maps will erase segments if count reaches 0, so if delta is negative we have to
|
|
// subtract after iterating
|
|
const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
|
|
if (delta > 0)
|
|
cached_pages.add({pages_interval, delta});
|
|
|
|
for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
|
|
const auto interval = pair.first & pages_interval;
|
|
const int count = pair.second;
|
|
|
|
const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
|
|
const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
|
|
const u64 interval_size = interval_end_addr - interval_start_addr;
|
|
|
|
if (delta > 0 && count == delta)
|
|
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
|
|
else if (delta < 0 && count == -delta)
|
|
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
|
|
else
|
|
ASSERT(count >= 0);
|
|
}
|
|
|
|
if (delta < 0)
|
|
cached_pages.add({pages_interval, delta});
|
|
}
|
|
|
|
void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
|
|
bool preserve_contents,
|
|
boost::optional<std::size_t> single_color_target) {
|
|
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
|
|
Surface depth_surface;
|
|
if (using_depth_fb) {
|
|
depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
|
|
}
|
|
|
|
// TODO(bunnei): Figure out how the below register works. According to envytools, this should be
|
|
// used to enable multiple render targets. However, it is left unset on all games that I have
|
|
// tested.
|
|
ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
|
|
|
|
// Bind the framebuffer surfaces
|
|
state.draw.draw_framebuffer = framebuffer.handle;
|
|
state.Apply();
|
|
|
|
if (using_color_fb) {
|
|
if (single_color_target) {
|
|
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
|
|
Surface color_surface =
|
|
res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
|
|
glFramebufferTexture2D(
|
|
GL_DRAW_FRAMEBUFFER,
|
|
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
|
|
color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
|
|
glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
|
|
} else {
|
|
// Multiple color attachments are enabled
|
|
std::array<GLenum, Maxwell::NumRenderTargets> buffers;
|
|
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
|
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
|
|
buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
|
|
glFramebufferTexture2D(
|
|
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
|
|
GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
|
|
0);
|
|
}
|
|
glDrawBuffers(regs.rt_control.count, buffers.data());
|
|
}
|
|
} else {
|
|
// No color attachments are enabled - zero out all of them
|
|
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
|
|
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
|
|
0, 0);
|
|
}
|
|
glDrawBuffer(GL_NONE);
|
|
}
|
|
|
|
if (depth_surface) {
|
|
if (regs.stencil_enable) {
|
|
// Attach both depth and stencil
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
|
depth_surface->Texture().handle, 0);
|
|
} else {
|
|
// Attach depth
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
|
|
depth_surface->Texture().handle, 0);
|
|
// Clear stencil attachment
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
|
}
|
|
} else {
|
|
// Clear both depth and stencil attachment
|
|
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
|
0);
|
|
}
|
|
|
|
SyncViewport();
|
|
|
|
state.Apply();
|
|
}
|
|
|
|
void RasterizerOpenGL::Clear() {
|
|
const auto prev_state{state};
|
|
SCOPE_EXIT({ prev_state.Apply(); });
|
|
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
bool use_color{};
|
|
bool use_depth{};
|
|
bool use_stencil{};
|
|
|
|
OpenGLState clear_state;
|
|
clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
|
|
clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
|
|
clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
|
|
clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
|
|
clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
|
|
|
|
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
|
|
regs.clear_buffers.A) {
|
|
use_color = true;
|
|
}
|
|
if (regs.clear_buffers.Z) {
|
|
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
|
|
use_depth = true;
|
|
|
|
// Always enable the depth write when clearing the depth buffer. The depth write mask is
|
|
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
|
|
clear_state.depth.test_enabled = true;
|
|
clear_state.depth.test_func = GL_ALWAYS;
|
|
}
|
|
if (regs.clear_buffers.S) {
|
|
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
|
|
use_stencil = true;
|
|
clear_state.stencil.test_enabled = true;
|
|
}
|
|
|
|
if (!use_color && !use_depth && !use_stencil) {
|
|
// No color surface nor depth/stencil surface are enabled
|
|
return;
|
|
}
|
|
|
|
ScopeAcquireGLContext acquire_context{emu_window};
|
|
|
|
ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
|
|
regs.clear_buffers.RT.Value());
|
|
|
|
clear_state.Apply();
|
|
|
|
if (use_color) {
|
|
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
|
}
|
|
|
|
if (use_depth && use_stencil) {
|
|
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
|
|
} else if (use_depth) {
|
|
glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth);
|
|
} else if (use_stencil) {
|
|
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
|
}
|
|
}
|
|
|
|
void RasterizerOpenGL::DrawArrays() {
|
|
if (accelerate_draw == AccelDraw::Disabled)
|
|
return;
|
|
|
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
|
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
|
const auto& regs = gpu.regs;
|
|
|
|
ScopeAcquireGLContext acquire_context{emu_window};
|
|
|
|
ConfigureFramebuffers();
|
|
|
|
SyncDepthTestState();
|
|
SyncStencilTestState();
|
|
SyncBlendState();
|
|
SyncLogicOpState();
|
|
SyncCullMode();
|
|
|
|
// TODO(bunnei): Sync framebuffer_scale uniform here
|
|
// TODO(bunnei): Sync scissorbox uniform(s) here
|
|
|
|
// Draw the vertex batch
|
|
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
|
|
const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
|
|
static_cast<u64>(regs.index_array.FormatSizeInBytes())};
|
|
|
|
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
|
state.Apply();
|
|
|
|
std::size_t buffer_size = CalculateVertexArraysSize();
|
|
|
|
if (is_indexed) {
|
|
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
|
|
}
|
|
|
|
// Uniform space for the 5 shader stages
|
|
buffer_size =
|
|
Common::AlignUp<std::size_t>(buffer_size, 4) +
|
|
(sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
|
|
|
|
// Add space for at least 18 constant buffers
|
|
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
|
|
|
buffer_cache.Map(buffer_size);
|
|
|
|
SetupVertexArrays();
|
|
|
|
// If indexed mode, copy the index buffer
|
|
GLintptr index_buffer_offset = 0;
|
|
if (is_indexed) {
|
|
MICROPROFILE_SCOPE(OpenGL_Index);
|
|
|
|
// Adjust the index buffer offset so it points to the first desired index.
|
|
auto index_start = regs.index_array.StartAddress();
|
|
index_start += static_cast<size_t>(regs.index_array.first) *
|
|
static_cast<size_t>(regs.index_array.FormatSizeInBytes());
|
|
|
|
index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
|
|
}
|
|
|
|
SetupShaders();
|
|
|
|
buffer_cache.Unmap();
|
|
|
|
shader_program_manager->ApplyTo(state);
|
|
state.Apply();
|
|
|
|
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
|
|
if (is_indexed) {
|
|
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
|
|
|
|
if (gpu.state.current_instance > 0) {
|
|
glDrawElementsInstancedBaseVertexBaseInstance(
|
|
primitive_mode, regs.index_array.count,
|
|
MaxwellToGL::IndexFormat(regs.index_array.format),
|
|
reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
|
|
gpu.state.current_instance);
|
|
} else {
|
|
glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
|
|
MaxwellToGL::IndexFormat(regs.index_array.format),
|
|
reinterpret_cast<const void*>(index_buffer_offset),
|
|
base_vertex);
|
|
}
|
|
} else {
|
|
if (gpu.state.current_instance > 0) {
|
|
glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
|
|
regs.vertex_buffer.count, 1,
|
|
gpu.state.current_instance);
|
|
} else {
|
|
glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
|
|
}
|
|
}
|
|
|
|
// Disable scissor test
|
|
state.scissor.enabled = false;
|
|
|
|
accelerate_draw = AccelDraw::Disabled;
|
|
|
|
// Unbind textures for potential future use as framebuffer attachments
|
|
for (auto& texture_unit : state.texture_units) {
|
|
texture_unit.Unbind();
|
|
}
|
|
state.Apply();
|
|
}
|
|
|
|
void RasterizerOpenGL::FlushAll() {}
|
|
|
|
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
|
|
|
|
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
|
res_cache.InvalidateRegion(addr, size);
|
|
shader_cache.InvalidateRegion(addr, size);
|
|
buffer_cache.InvalidateRegion(addr, size);
|
|
}
|
|
|
|
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
|
InvalidateRegion(addr, size);
|
|
}
|
|
|
|
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
|
|
MICROPROFILE_SCOPE(OpenGL_Blits);
|
|
UNREACHABLE();
|
|
return true;
|
|
}
|
|
|
|
bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) {
|
|
UNREACHABLE();
|
|
return true;
|
|
}
|
|
|
|
bool RasterizerOpenGL::AccelerateFill(const void* config) {
|
|
UNREACHABLE();
|
|
return true;
|
|
}
|
|
|
|
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|
VAddr framebuffer_addr, u32 pixel_stride) {
|
|
if (!framebuffer_addr) {
|
|
return {};
|
|
}
|
|
|
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
|
|
|
const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
|
if (!surface) {
|
|
return {};
|
|
}
|
|
|
|
// Verify that the cached surface is the same size and format as the requested framebuffer
|
|
const auto& params{surface->GetSurfaceParams()};
|
|
const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
|
|
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
|
|
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
|
|
ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
|
|
|
|
screen_info.display_texture = surface->Texture().handle;
|
|
|
|
return true;
|
|
}
|
|
|
|
void RasterizerOpenGL::SamplerInfo::Create() {
|
|
sampler.Create();
|
|
mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
|
|
wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
|
|
|
|
// default is GL_LINEAR_MIPMAP_LINEAR
|
|
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
|
// Other attributes have correct defaults
|
|
}
|
|
|
|
void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
|
|
const GLuint s = sampler.handle;
|
|
|
|
if (mag_filter != config.mag_filter) {
|
|
mag_filter = config.mag_filter;
|
|
glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter));
|
|
}
|
|
if (min_filter != config.min_filter) {
|
|
min_filter = config.min_filter;
|
|
glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, MaxwellToGL::TextureFilterMode(min_filter));
|
|
}
|
|
|
|
if (wrap_u != config.wrap_u) {
|
|
wrap_u = config.wrap_u;
|
|
glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
|
|
}
|
|
if (wrap_v != config.wrap_v) {
|
|
wrap_v = config.wrap_v;
|
|
glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
|
|
}
|
|
if (wrap_p != config.wrap_p) {
|
|
wrap_p = config.wrap_p;
|
|
glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
|
|
}
|
|
|
|
if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border ||
|
|
wrap_p == Tegra::Texture::WrapMode::Border) {
|
|
const GLvec4 new_border_color = {{config.border_color_r, config.border_color_g,
|
|
config.border_color_b, config.border_color_a}};
|
|
if (border_color != new_border_color) {
|
|
border_color = new_border_color;
|
|
glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data());
|
|
}
|
|
}
|
|
}
|
|
|
|
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
|
|
u32 current_bindpoint) {
|
|
MICROPROFILE_SCOPE(OpenGL_UBO);
|
|
const auto& gpu = Core::System::GetInstance().GPU();
|
|
const auto& maxwell3d = gpu.Maxwell3D();
|
|
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
|
|
const auto& entries = shader->GetShaderEntries().const_buffer_entries;
|
|
|
|
constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
|
|
std::array<GLuint, max_binds> bind_buffers;
|
|
std::array<GLintptr, max_binds> bind_offsets;
|
|
std::array<GLsizeiptr, max_binds> bind_sizes;
|
|
|
|
ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
|
|
|
|
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
|
|
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
|
const auto& used_buffer = entries[bindpoint];
|
|
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
|
|
|
|
if (!buffer.enabled) {
|
|
// With disabled buffers set values as zero to unbind them
|
|
bind_buffers[bindpoint] = 0;
|
|
bind_offsets[bindpoint] = 0;
|
|
bind_sizes[bindpoint] = 0;
|
|
continue;
|
|
}
|
|
|
|
std::size_t size = 0;
|
|
|
|
if (used_buffer.IsIndirect()) {
|
|
// Buffer is accessed indirectly, so upload the entire thing
|
|
size = buffer.size;
|
|
|
|
if (size > MaxConstbufferSize) {
|
|
LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
|
|
MaxConstbufferSize);
|
|
size = MaxConstbufferSize;
|
|
}
|
|
} else {
|
|
// Buffer is accessed directly, upload just what we use
|
|
size = used_buffer.GetSize() * sizeof(float);
|
|
}
|
|
|
|
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
|
|
// UBO alignment requirements.
|
|
size = Common::AlignUp(size, sizeof(GLvec4));
|
|
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
|
|
|
|
GLintptr const_buffer_offset = buffer_cache.UploadMemory(
|
|
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
|
|
|
|
// Now configure the bindpoint of the buffer inside the shader
|
|
glUniformBlockBinding(shader->GetProgramHandle(),
|
|
shader->GetProgramResourceIndex(used_buffer),
|
|
current_bindpoint + bindpoint);
|
|
|
|
// Prepare values for multibind
|
|
bind_buffers[bindpoint] = buffer_cache.GetHandle();
|
|
bind_offsets[bindpoint] = const_buffer_offset;
|
|
bind_sizes[bindpoint] = size;
|
|
}
|
|
|
|
glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
|
|
bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
|
|
|
|
return current_bindpoint + static_cast<u32>(entries.size());
|
|
}
|
|
|
|
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {
|
|
MICROPROFILE_SCOPE(OpenGL_Texture);
|
|
const auto& gpu = Core::System::GetInstance().GPU();
|
|
const auto& maxwell3d = gpu.Maxwell3D();
|
|
const auto& entries = shader->GetShaderEntries().texture_samplers;
|
|
|
|
ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
|
|
"Exceeded the number of active textures.");
|
|
|
|
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
|
const auto& entry = entries[bindpoint];
|
|
const u32 current_bindpoint = current_unit + bindpoint;
|
|
|
|
// Bind the uniform to the sampler.
|
|
|
|
glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry),
|
|
current_bindpoint);
|
|
|
|
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
|
|
|
|
if (!texture.enabled) {
|
|
state.texture_units[current_bindpoint].texture = 0;
|
|
continue;
|
|
}
|
|
|
|
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
|
|
Surface surface = res_cache.GetTextureSurface(texture);
|
|
if (surface != nullptr) {
|
|
state.texture_units[current_bindpoint].texture = surface->Texture().handle;
|
|
state.texture_units[current_bindpoint].target = surface->Target();
|
|
state.texture_units[current_bindpoint].swizzle.r =
|
|
MaxwellToGL::SwizzleSource(texture.tic.x_source);
|
|
state.texture_units[current_bindpoint].swizzle.g =
|
|
MaxwellToGL::SwizzleSource(texture.tic.y_source);
|
|
state.texture_units[current_bindpoint].swizzle.b =
|
|
MaxwellToGL::SwizzleSource(texture.tic.z_source);
|
|
state.texture_units[current_bindpoint].swizzle.a =
|
|
MaxwellToGL::SwizzleSource(texture.tic.w_source);
|
|
} else {
|
|
// Can occur when texture addr is null or its memory is unmapped/invalid
|
|
state.texture_units[current_bindpoint].texture = 0;
|
|
}
|
|
}
|
|
|
|
return current_unit + static_cast<u32>(entries.size());
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncViewport() {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
|
|
|
|
state.viewport.x = viewport_rect.left;
|
|
state.viewport.y = viewport_rect.bottom;
|
|
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
|
|
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncClipEnabled() {
|
|
UNREACHABLE();
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncClipCoef() {
|
|
UNREACHABLE();
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncCullMode() {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
|
|
state.cull.enabled = regs.cull.enabled != 0;
|
|
|
|
if (state.cull.enabled) {
|
|
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
|
|
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
|
|
|
|
const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
|
|
regs.viewport_transform[0].scale_y < 0.0f};
|
|
|
|
// If the GPU is configured to flip the rasterized triangles, then we need to flip the
|
|
// notion of front and back. Note: We flip the triangles when the value of the register is 0
|
|
// because OpenGL already does it for us.
|
|
if (flip_triangles) {
|
|
if (state.cull.front_face == GL_CCW)
|
|
state.cull.front_face = GL_CW;
|
|
else if (state.cull.front_face == GL_CW)
|
|
state.cull.front_face = GL_CCW;
|
|
}
|
|
}
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncDepthScale() {
|
|
UNREACHABLE();
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncDepthOffset() {
|
|
UNREACHABLE();
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncDepthTestState() {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
|
|
state.depth.test_enabled = regs.depth_test_enable != 0;
|
|
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
|
|
|
|
if (!state.depth.test_enabled)
|
|
return;
|
|
|
|
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncStencilTestState() {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
state.stencil.test_enabled = regs.stencil_enable != 0;
|
|
|
|
if (!regs.stencil_enable) {
|
|
return;
|
|
}
|
|
|
|
// TODO(bunnei): Verify behavior when this is not set
|
|
ASSERT(regs.stencil_two_side_enable);
|
|
|
|
state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
|
|
state.stencil.front.test_ref = regs.stencil_front_func_ref;
|
|
state.stencil.front.test_mask = regs.stencil_front_func_mask;
|
|
state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail);
|
|
state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail);
|
|
state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass);
|
|
state.stencil.front.write_mask = regs.stencil_front_mask;
|
|
|
|
state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
|
|
state.stencil.back.test_ref = regs.stencil_back_func_ref;
|
|
state.stencil.back.test_mask = regs.stencil_back_func_mask;
|
|
state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
|
|
state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
|
|
state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
|
|
state.stencil.back.write_mask = regs.stencil_back_mask;
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncBlendState() {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
|
|
// TODO(Subv): Support more than just render target 0.
|
|
state.blend.enabled = regs.blend.enable[0] != 0;
|
|
|
|
if (!state.blend.enabled)
|
|
return;
|
|
|
|
ASSERT_MSG(regs.logic_op.enable == 0,
|
|
"Blending and logic op can't be enabled at the same time.");
|
|
|
|
ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
|
|
ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
|
|
state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
|
|
state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
|
|
state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
|
|
state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
|
|
state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
|
|
state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
|
|
}
|
|
|
|
void RasterizerOpenGL::SyncLogicOpState() {
|
|
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
|
|
|
// TODO(Subv): Support more than just render target 0.
|
|
state.logic_op.enabled = regs.logic_op.enable != 0;
|
|
|
|
if (!state.logic_op.enabled)
|
|
return;
|
|
|
|
ASSERT_MSG(regs.blend.enable[0] == 0,
|
|
"Blending and logic op can't be enabled at the same time.");
|
|
|
|
state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
|
|
}
|
|
|
|
} // namespace OpenGL
|