forked from suyu/suyu
gl_shader_cache: Remove dynamic BaseBinding specialization
This commit is contained in:
parent
c8a48aacc0
commit
180417c514
16 changed files with 200 additions and 192 deletions
|
@ -63,7 +63,6 @@ public:
|
|||
static constexpr std::size_t NumVertexArrays = 32;
|
||||
static constexpr std::size_t NumVertexAttributes = 32;
|
||||
static constexpr std::size_t NumVaryings = 31;
|
||||
static constexpr std::size_t NumTextureSamplers = 32;
|
||||
static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
|
||||
static constexpr std::size_t NumClipDistances = 8;
|
||||
static constexpr std::size_t MaxShaderProgram = 6;
|
||||
|
|
|
@ -16,5 +16,6 @@ enum class ShaderType : u32 {
|
|||
Fragment = 4,
|
||||
Compute = 5,
|
||||
};
|
||||
static constexpr std::size_t MaxShaderTypes = 6;
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -17,6 +17,9 @@ namespace OpenGL {
|
|||
|
||||
namespace {
|
||||
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
GLint temporary;
|
||||
|
@ -48,6 +51,22 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
|
|||
return std::find(images.begin(), images.end(), extension) != images.end();
|
||||
}
|
||||
|
||||
constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) {
|
||||
return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer,
|
||||
lhs.shader_storage_buffer + rhs.shader_storage_buffer,
|
||||
lhs.sampler + rhs.sampler, lhs.image + rhs.image};
|
||||
}
|
||||
|
||||
Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks,
|
||||
GLenum texture_image_units, GLenum image_uniforms) noexcept {
|
||||
return Device::BaseBindings{
|
||||
GetInteger<u32>(uniform_blocks) - ReservedUniformBlocks,
|
||||
GetInteger<u32>(shader_storage_blocks),
|
||||
GetInteger<u32>(texture_image_units),
|
||||
GetInteger<u32>(image_uniforms),
|
||||
};
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device() {
|
||||
|
@ -56,6 +75,29 @@ Device::Device() {
|
|||
|
||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||
|
||||
// Reserve the first UBO for emulation bindings
|
||||
base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0};
|
||||
base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS,
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS);
|
||||
base_bindings[2] =
|
||||
base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS);
|
||||
base_bindings[3] =
|
||||
base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS);
|
||||
base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_IMAGE_UNIFORMS);
|
||||
// Compute doesn't need any of that
|
||||
base_bindings[5] = BaseBindings{0, 0, 0, 0};
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
|
|
|
@ -6,14 +6,32 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device {
|
||||
static constexpr u32 EmulationUniformBlockBinding = 0;
|
||||
|
||||
class Device final {
|
||||
public:
|
||||
struct BaseBindings final {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
};
|
||||
|
||||
explicit Device();
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||
return base_bindings[stage_index];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
}
|
||||
|
||||
std::size_t GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
@ -67,6 +85,7 @@ private:
|
|||
static bool TestComponentIndexingBug();
|
||||
static bool TestPreciseBug();
|
||||
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
|
|
|
@ -258,7 +258,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
BaseBindings base_bindings;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
|
@ -277,25 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
continue;
|
||||
}
|
||||
|
||||
GLShader::MaxwellUniformData ubo{};
|
||||
ubo.SetFromRegs(gpu);
|
||||
const auto [buffer, offset] =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
|
||||
// Bind the emulation info buffer
|
||||
bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
|
||||
// Stage indices are 0 - 5
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(stage, shader, base_bindings);
|
||||
SetupDrawImages(stage, shader, base_bindings);
|
||||
SetupDrawTextures(stage, shader);
|
||||
SetupDrawImages(stage, shader);
|
||||
|
||||
const ProgramVariant variant(base_bindings, primitive_mode);
|
||||
const auto [program_handle, next_bindings] = shader->GetHandle(variant);
|
||||
const ProgramVariant variant(primitive_mode);
|
||||
const auto program_handle = shader->GetHandle(variant);
|
||||
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
|
@ -326,8 +317,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
||||
++index;
|
||||
}
|
||||
|
||||
base_bindings = next_bindings;
|
||||
}
|
||||
|
||||
SyncClipEnabled(clip_distances);
|
||||
|
@ -612,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() {
|
|||
index_buffer_offset = SetupIndexBuffer();
|
||||
|
||||
// Prepare packed bindings.
|
||||
bind_ubo_pushbuffer.Setup(0);
|
||||
bind_ssbo_pushbuffer.Setup(0);
|
||||
bind_ubo_pushbuffer.Setup();
|
||||
bind_ssbo_pushbuffer.Setup();
|
||||
|
||||
// Setup emulation uniform buffer.
|
||||
GLShader::MaxwellUniformData ubo;
|
||||
ubo.SetFromRegs(gpu);
|
||||
const auto [buffer, offset] =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset,
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
texture_cache.GuardSamplers(true);
|
||||
|
@ -754,7 +751,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
|
||||
launch_desc.block_dim_z, launch_desc.shared_alloc,
|
||||
launch_desc.local_pos_alloc);
|
||||
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
|
||||
state.draw.shader_program = kernel->GetHandle(variant);
|
||||
state.draw.program_pipeline = 0;
|
||||
|
||||
const std::size_t buffer_size =
|
||||
|
@ -762,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
bind_ubo_pushbuffer.Setup(0);
|
||||
bind_ssbo_pushbuffer.Setup(0);
|
||||
bind_ubo_pushbuffer.Setup();
|
||||
bind_ssbo_pushbuffer.Setup();
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
SetupComputeGlobalMemory(kernel);
|
||||
|
@ -847,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
|
||||
|
||||
if (params.pixel_format != pixel_format) {
|
||||
LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
}
|
||||
|
||||
screen_info.display_texture = surface->GetTexture();
|
||||
|
@ -858,17 +855,21 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer;
|
||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
|
||||
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
|
||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
||||
SetupConstBuffer(buffer, entry);
|
||||
SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
|
@ -876,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
|||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(buffer, entry);
|
||||
SetupConstBuffer(binding++, buffer, entry);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const GLShader::ConstBufferEntry& entry) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
||||
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
|
||||
sizeof(float));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -895,18 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
|
|||
const auto alignment = device.GetUniformBufferAlignment();
|
||||
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
|
||||
device.HasFastBufferSubData());
|
||||
bind_ubo_pushbuffer.Push(cbuf, offset, size);
|
||||
bind_ubo_pushbuffer.Push(binding, cbuf, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||
|
||||
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
|
||||
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
|
||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(entry, gpu_addr, size);
|
||||
SetupGlobalMemory(binding++, entry, gpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -914,38 +918,35 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
|||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
|
||||
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
|
||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(entry, gpu_addr, size);
|
||||
SetupGlobalMemory(binding++, entry, gpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
|
||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, std::size_t size) {
|
||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto [ssbo, buffer_offset] =
|
||||
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
|
||||
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
|
||||
bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& gpu = system.GPU();
|
||||
const auto& maxwell3d = gpu.Maxwell3D();
|
||||
const auto& entries = shader->GetShaderEntries().samplers;
|
||||
|
||||
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures),
|
||||
"Exceeded the number of active textures.");
|
||||
|
||||
const auto num_entries = static_cast<u32>(entries.size());
|
||||
for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) {
|
||||
const auto& entry = entries[bindpoint];
|
||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||
for (const auto& entry : entries) {
|
||||
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
|
||||
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
|
||||
SetupTexture(base_bindings.sampler + bindpoint, texture, entry);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -954,14 +955,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
|
|||
const auto& compute = system.GPU().KeplerCompute();
|
||||
const auto& entries = kernel->GetShaderEntries().samplers;
|
||||
|
||||
ASSERT_MSG(entries.size() <= std::size(state.textures),
|
||||
"Exceeded the number of active textures.");
|
||||
|
||||
const auto num_entries = static_cast<u32>(entries.size());
|
||||
for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) {
|
||||
const auto& entry = entries[bindpoint];
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries) {
|
||||
const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
|
||||
SetupTexture(bindpoint, texture, entry);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -986,8 +983,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
|
|||
texture.tic.w_source);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
|
||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
const auto& entries = shader->GetShaderEntries().images;
|
||||
|
||||
|
@ -996,7 +992,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
|
|||
const auto& entry = entries[bindpoint];
|
||||
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
|
||||
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
|
||||
SetupImage(base_bindings.image + bindpoint, tic, entry);
|
||||
SetupImage(bindpoint, tic, entry);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -89,7 +89,7 @@ private:
|
|||
void SetupComputeConstBuffers(const Shader& kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const GLShader::ConstBufferEntry& entry);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
|
@ -99,15 +99,14 @@ private:
|
|||
void SetupComputeGlobalMemory(const Shader& kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size);
|
||||
|
||||
/// Syncs all the state, shaders, render targets and textures setting before a draw call.
|
||||
void DrawPrelude();
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, const Shader& shader,
|
||||
BaseBindings base_bindings);
|
||||
void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(const Shader& kernel);
|
||||
|
@ -117,7 +116,7 @@ private:
|
|||
const GLShader::SamplerEntry& entry);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings);
|
||||
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader& shader);
|
||||
|
|
|
@ -266,28 +266,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
|
|||
}
|
||||
source += '\n';
|
||||
|
||||
auto base_bindings = variant.base_bindings;
|
||||
if (!is_compute) {
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
}
|
||||
|
||||
for (const auto& cbuf : entries.const_buffers) {
|
||||
source +=
|
||||
fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
|
||||
}
|
||||
for (const auto& gmem : entries.global_memory_entries) {
|
||||
source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
|
||||
gmem.GetCbufOffset(), base_bindings.gmem++);
|
||||
}
|
||||
for (const auto& sampler : entries.samplers) {
|
||||
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
|
||||
base_bindings.sampler++);
|
||||
}
|
||||
for (const auto& image : entries.images) {
|
||||
source +=
|
||||
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
|
||||
}
|
||||
|
||||
if (shader_type == ShaderType::Geometry) {
|
||||
const auto [glsl_topology, debug_name, max_vertices] =
|
||||
GetPrimitiveDescription(variant.primitive_mode);
|
||||
|
@ -403,27 +381,21 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params,
|
|||
unspecialized.code_b));
|
||||
}
|
||||
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetHandle(const ProgramVariant& variant) {
|
||||
GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
|
||||
EnsureValidLockerVariant();
|
||||
|
||||
const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
|
||||
auto& program = entry->second;
|
||||
if (is_cache_miss) {
|
||||
program = BuildShader(device, unique_identifier, shader_type, code, code_b,
|
||||
*curr_locker_variant->locker, variant);
|
||||
disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
|
||||
|
||||
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
|
||||
if (!is_cache_miss) {
|
||||
return program->handle;
|
||||
}
|
||||
|
||||
auto base_bindings = variant.base_bindings;
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
|
||||
base_bindings.cbuf += STAGE_RESERVED_UBOS;
|
||||
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
|
||||
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
|
||||
base_bindings.image += static_cast<u32>(entries.images.size());
|
||||
program = BuildShader(device, unique_identifier, shader_type, code, code_b,
|
||||
*curr_locker_variant->locker, variant);
|
||||
disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
|
||||
|
||||
return {program->handle, base_bindings};
|
||||
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
|
||||
return program->handle;
|
||||
}
|
||||
|
||||
bool CachedShader::EnsureValidLockerVariant() {
|
||||
|
|
|
@ -87,7 +87,7 @@ public:
|
|||
}
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
std::tuple<GLuint, BaseBindings> GetHandle(const ProgramVariant& variant);
|
||||
GLuint GetHandle(const ProgramVariant& variant);
|
||||
|
||||
private:
|
||||
struct LockerVariant {
|
||||
|
|
|
@ -43,6 +43,9 @@ using namespace VideoCommon::Shader;
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using Operation = const OperationNode&;
|
||||
|
||||
class ASTDecompiler;
|
||||
class ExprDecompiler;
|
||||
|
||||
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
|
||||
struct TextureAoffi {};
|
||||
|
@ -337,9 +340,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
|||
return stage == ShaderType::Vertex;
|
||||
}
|
||||
|
||||
class ASTDecompiler;
|
||||
class ExprDecompiler;
|
||||
|
||||
class GLSLDecompiler final {
|
||||
public:
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
|
||||
|
@ -621,7 +621,8 @@ private:
|
|||
void DeclareConstantBuffers() {
|
||||
for (const auto& entry : ir.GetConstantBuffers()) {
|
||||
const auto [index, size] = entry;
|
||||
code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
|
||||
const u32 binding = device.GetBaseBindings(stage).uniform_buffer + index;
|
||||
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding,
|
||||
GetConstBufferBlock(index));
|
||||
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
|
||||
code.AddLine("}};");
|
||||
|
@ -630,6 +631,8 @@ private:
|
|||
}
|
||||
|
||||
void DeclareGlobalMemory() {
|
||||
u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
|
||||
|
||||
for (const auto& gmem : ir.GetGlobalMemory()) {
|
||||
const auto& [base, usage] = gmem;
|
||||
|
||||
|
@ -642,8 +645,8 @@ private:
|
|||
qualifier += " writeonly";
|
||||
}
|
||||
|
||||
code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
|
||||
base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
|
||||
code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
|
||||
GetGlobalMemoryBlock(base));
|
||||
code.AddLine(" uint {}[];", GetGlobalMemory(base));
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
|
@ -653,9 +656,11 @@ private:
|
|||
void DeclareSamplers() {
|
||||
const auto& samplers = ir.GetSamplers();
|
||||
for (const auto& sampler : samplers) {
|
||||
const std::string name{GetSampler(sampler)};
|
||||
const std::string description{"layout (binding = SAMPLER_BINDING_" +
|
||||
std::to_string(sampler.GetIndex()) + ") uniform"};
|
||||
const std::string name = GetSampler(sampler);
|
||||
|
||||
const u32 binding = device.GetBaseBindings(stage).sampler + sampler.GetIndex();
|
||||
const std::string description = fmt::format("layout (binding = {}) uniform", binding);
|
||||
|
||||
std::string sampler_type = [&]() {
|
||||
if (sampler.IsBuffer()) {
|
||||
return "samplerBuffer";
|
||||
|
@ -732,10 +737,12 @@ private:
|
|||
qualifier += " writeonly";
|
||||
}
|
||||
|
||||
const u32 binding = device.GetBaseBindings(stage).image + image.GetIndex();
|
||||
|
||||
const char* format = image.IsAtomic() ? "r32ui, " : "";
|
||||
const char* type_declaration = GetImageTypeDeclaration(image.GetType());
|
||||
code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format,
|
||||
image.GetIndex(), qualifier, type_declaration, GetImage(image));
|
||||
code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding,
|
||||
qualifier, type_declaration, GetImage(image));
|
||||
}
|
||||
if (!images.empty()) {
|
||||
code.AddNewLine();
|
||||
|
|
|
@ -53,11 +53,10 @@ struct BindlessSamplerKey {
|
|||
Tegra::Engines::SamplerDescriptor sampler{};
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 10;
|
||||
constexpr u32 NativeVersion = 11;
|
||||
|
||||
// Making sure sizes doesn't change by accident
|
||||
static_assert(sizeof(BaseBindings) == 16);
|
||||
static_assert(sizeof(ProgramVariant) == 36);
|
||||
static_assert(sizeof(ProgramVariant) == 20);
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
|
|
|
@ -38,31 +38,13 @@ struct ShaderDiskCacheDump;
|
|||
using ProgramCode = std::vector<u64>;
|
||||
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program
|
||||
struct BaseBindings {
|
||||
u32 cbuf{};
|
||||
u32 gmem{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
|
||||
bool operator==(const BaseBindings& rhs) const noexcept {
|
||||
return std::tie(cbuf, gmem, sampler, image) ==
|
||||
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
|
||||
}
|
||||
|
||||
bool operator!=(const BaseBindings& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<BaseBindings>);
|
||||
|
||||
/// Describes the different variants a program can be compiled with.
|
||||
struct ProgramVariant final {
|
||||
ProgramVariant() = default;
|
||||
|
||||
/// Graphics constructor.
|
||||
explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept
|
||||
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
|
||||
explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
|
||||
: primitive_mode{primitive_mode} {}
|
||||
|
||||
/// Compute constructor.
|
||||
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
|
||||
|
@ -71,7 +53,6 @@ struct ProgramVariant final {
|
|||
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
|
||||
|
||||
// Graphics specific parameters.
|
||||
BaseBindings base_bindings{};
|
||||
GLenum primitive_mode{};
|
||||
|
||||
// Compute specific parameters.
|
||||
|
@ -82,10 +63,10 @@ struct ProgramVariant final {
|
|||
u32 local_memory_size{};
|
||||
|
||||
bool operator==(const ProgramVariant& rhs) const noexcept {
|
||||
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
|
||||
shared_memory_size, local_memory_size) ==
|
||||
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
|
||||
rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
|
||||
return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
|
||||
local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
|
||||
rhs.block_z, rhs.shared_memory_size,
|
||||
rhs.local_memory_size);
|
||||
}
|
||||
|
||||
bool operator!=(const ProgramVariant& rhs) const noexcept {
|
||||
|
@ -117,21 +98,10 @@ struct ShaderDiskCacheUsage {
|
|||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::BaseBindings> {
|
||||
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
|
||||
return static_cast<std::size_t>(bindings.cbuf) ^
|
||||
(static_cast<std::size_t>(bindings.gmem) << 8) ^
|
||||
(static_cast<std::size_t>(bindings.sampler) << 16) ^
|
||||
(static_cast<std::size_t>(bindings.image) << 24);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::ProgramVariant> {
|
||||
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
|
||||
return std::hash<OpenGL::BaseBindings>{}(variant.base_bindings) ^
|
||||
(static_cast<std::size_t>(variant.primitive_mode) << 6) ^
|
||||
return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
|
||||
static_cast<std::size_t>(variant.block_x) ^
|
||||
(static_cast<std::size_t>(variant.block_y) << 32) ^
|
||||
(static_cast<std::size_t>(variant.block_z) << 48) ^
|
||||
|
|
|
@ -2,9 +2,13 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
@ -20,12 +24,13 @@ using VideoCommon::Shader::ShaderIR;
|
|||
|
||||
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
|
||||
std::string out = GetCommonDeclarations();
|
||||
out += R"(
|
||||
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
|
||||
out += fmt::format(R"(
|
||||
layout (std140, binding = {}) uniform vs_config {{
|
||||
float y_direction;
|
||||
};
|
||||
}};
|
||||
|
||||
)";
|
||||
)",
|
||||
EmulationUniformBlockBinding);
|
||||
out += Decompile(device, ir, ShaderType::Vertex, "vertex");
|
||||
if (ir_b) {
|
||||
out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
|
||||
|
@ -44,12 +49,13 @@ void main() {
|
|||
|
||||
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
|
||||
std::string out = GetCommonDeclarations();
|
||||
out += R"(
|
||||
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
|
||||
out += fmt::format(R"(
|
||||
layout (std140, binding = {}) uniform gs_config {{
|
||||
float y_direction;
|
||||
};
|
||||
}};
|
||||
|
||||
)";
|
||||
)",
|
||||
EmulationUniformBlockBinding);
|
||||
out += Decompile(device, ir, ShaderType::Geometry, "geometry");
|
||||
|
||||
out += R"(
|
||||
|
@ -62,7 +68,7 @@ void main() {
|
|||
|
||||
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
|
||||
std::string out = GetCommonDeclarations();
|
||||
out += R"(
|
||||
out += fmt::format(R"(
|
||||
layout (location = 0) out vec4 FragColor0;
|
||||
layout (location = 1) out vec4 FragColor1;
|
||||
layout (location = 2) out vec4 FragColor2;
|
||||
|
@ -72,11 +78,12 @@ layout (location = 5) out vec4 FragColor5;
|
|||
layout (location = 6) out vec4 FragColor6;
|
||||
layout (location = 7) out vec4 FragColor7;
|
||||
|
||||
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
|
||||
layout (std140, binding = {}) uniform fs_config {{
|
||||
float y_direction;
|
||||
};
|
||||
}};
|
||||
|
||||
)";
|
||||
)",
|
||||
EmulationUniformBlockBinding);
|
||||
out += Decompile(device, ir, ShaderType::Fragment, "fragment");
|
||||
|
||||
out += R"(
|
||||
|
|
|
@ -417,14 +417,20 @@ void OpenGLState::ApplyClipControl() {
|
|||
}
|
||||
|
||||
void OpenGLState::ApplyTextures() {
|
||||
if (const auto update = UpdateArray(cur_state.textures, textures)) {
|
||||
glBindTextures(update->first, update->second, textures.data() + update->first);
|
||||
const std::size_t size = std::size(textures);
|
||||
for (std::size_t i = 0; i < size; ++i) {
|
||||
if (UpdateValue(cur_state.textures[i], textures[i])) {
|
||||
glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OpenGLState::ApplySamplers() {
|
||||
if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
|
||||
glBindSamplers(update->first, update->second, samplers.data() + update->first);
|
||||
const std::size_t size = std::size(samplers);
|
||||
for (std::size_t i = 0; i < size; ++i) {
|
||||
if (UpdateValue(cur_state.samplers[i], samplers[i])) {
|
||||
glBindSampler(static_cast<GLuint>(i), samplers[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -96,8 +96,9 @@ public:
|
|||
GLenum operation = GL_COPY;
|
||||
} logic_op;
|
||||
|
||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {};
|
||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {};
|
||||
static constexpr std::size_t NumSamplers = 32 * 5;
|
||||
std::array<GLuint, NumSamplers> textures = {};
|
||||
std::array<GLuint, NumSamplers> samplers = {};
|
||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {};
|
||||
|
||||
struct {
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
|
@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t
|
|||
|
||||
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
|
||||
|
||||
void BindBuffersRangePushBuffer::Setup(GLuint first_) {
|
||||
first = first_;
|
||||
buffer_pointers.clear();
|
||||
offsets.clear();
|
||||
sizes.clear();
|
||||
void BindBuffersRangePushBuffer::Setup() {
|
||||
entries.clear();
|
||||
}
|
||||
|
||||
void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
|
||||
buffer_pointers.push_back(buffer);
|
||||
offsets.push_back(offset);
|
||||
sizes.push_back(size);
|
||||
void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
|
||||
GLsizeiptr size) {
|
||||
entries.push_back(Entry{binding, buffer, offset, size});
|
||||
}
|
||||
|
||||
void BindBuffersRangePushBuffer::Bind() {
|
||||
// Ensure sizes are valid.
|
||||
const std::size_t count{buffer_pointers.size()};
|
||||
DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
|
||||
if (count == 0) {
|
||||
return;
|
||||
for (const Entry& entry : entries) {
|
||||
glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
|
||||
}
|
||||
|
||||
// Dereference buffers.
|
||||
buffers.resize(count);
|
||||
std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
|
||||
[](const GLuint* pointer) { return *pointer; });
|
||||
|
||||
glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
|
||||
sizes.data());
|
||||
}
|
||||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
|
||||
|
|
|
@ -43,20 +43,22 @@ public:
|
|||
explicit BindBuffersRangePushBuffer(GLenum target);
|
||||
~BindBuffersRangePushBuffer();
|
||||
|
||||
void Setup(GLuint first_);
|
||||
void Setup();
|
||||
|
||||
void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
|
||||
void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
|
||||
|
||||
void Bind();
|
||||
|
||||
private:
|
||||
GLenum target{};
|
||||
GLuint first{};
|
||||
std::vector<const GLuint*> buffer_pointers;
|
||||
struct Entry {
|
||||
GLuint binding;
|
||||
const GLuint* buffer;
|
||||
GLintptr offset;
|
||||
GLsizeiptr size;
|
||||
};
|
||||
|
||||
std::vector<GLuint> buffers;
|
||||
std::vector<GLintptr> offsets;
|
||||
std::vector<GLsizeiptr> sizes;
|
||||
GLenum target;
|
||||
std::vector<Entry> entries;
|
||||
};
|
||||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
|
||||
|
|
Loading…
Reference in a new issue