shader: Initial OpenGL implementation
This commit is contained in:
parent
850b08a16c
commit
d621e96d0d
38 changed files with 1427 additions and 705 deletions
|
@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() {
|
|||
return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
|
||||
}
|
||||
|
||||
Value IREmitter::LocalInvocationId() {
|
||||
return Inst(Opcode::LocalInvocationId);
|
||||
}
|
||||
|
||||
U32 IREmitter::LocalInvocationIdX() {
|
||||
return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
|
||||
}
|
||||
|
|
|
@ -95,6 +95,7 @@ public:
|
|||
[[nodiscard]] U32 WorkgroupIdY();
|
||||
[[nodiscard]] U32 WorkgroupIdZ();
|
||||
|
||||
[[nodiscard]] Value LocalInvocationId();
|
||||
[[nodiscard]] U32 LocalInvocationIdX();
|
||||
[[nodiscard]] U32 LocalInvocationIdY();
|
||||
[[nodiscard]] U32 LocalInvocationIdZ();
|
||||
|
|
|
@ -120,6 +120,13 @@ enum class SpecialRegister : u64 {
|
|||
case SpecialRegister::SR_INVOCATION_INFO:
|
||||
// LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed");
|
||||
return ir.Imm32(0x00ff'0000);
|
||||
case SpecialRegister::SR_TID: {
|
||||
const IR::Value tid{ir.LocalInvocationId()};
|
||||
return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
|
||||
IR::U32{ir.CompositeExtract(tid, 1)},
|
||||
ir.Imm32(16), ir.Imm32(8)),
|
||||
IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
|
||||
}
|
||||
case SpecialRegister::SR_TID_X:
|
||||
return ir.LocalInvocationIdX();
|
||||
case SpecialRegister::SR_TID_Y:
|
||||
|
|
|
@ -67,10 +67,14 @@ add_library(video_core STATIC
|
|||
renderer_base.h
|
||||
renderer_opengl/gl_buffer_cache.cpp
|
||||
renderer_opengl/gl_buffer_cache.h
|
||||
renderer_opengl/gl_compute_program.cpp
|
||||
renderer_opengl/gl_compute_program.h
|
||||
renderer_opengl/gl_device.cpp
|
||||
renderer_opengl/gl_device.h
|
||||
renderer_opengl/gl_fence_manager.cpp
|
||||
renderer_opengl/gl_fence_manager.h
|
||||
renderer_opengl/gl_graphics_program.cpp
|
||||
renderer_opengl/gl_graphics_program.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_resource_manager.cpp
|
||||
|
|
|
@ -70,8 +70,8 @@ class BufferCache {
|
|||
P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
|
||||
static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX;
|
||||
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
|
||||
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
|
@ -154,7 +154,7 @@ public:
|
|||
void UnbindGraphicsTextureBuffers(size_t stage);
|
||||
|
||||
void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
|
||||
PixelFormat format, bool is_written);
|
||||
PixelFormat format, bool is_written, bool is_image);
|
||||
|
||||
void UnbindComputeStorageBuffers();
|
||||
|
||||
|
@ -164,7 +164,7 @@ public:
|
|||
void UnbindComputeTextureBuffers();
|
||||
|
||||
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
|
||||
bool is_written);
|
||||
bool is_written, bool is_image);
|
||||
|
||||
void FlushCachedWrites();
|
||||
|
||||
|
@ -197,6 +197,7 @@ public:
|
|||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||
|
||||
std::mutex mutex;
|
||||
Runtime& runtime;
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
|
@ -366,7 +367,6 @@ private:
|
|||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Runtime& runtime;
|
||||
|
||||
SlotVector<Buffer> slot_buffers;
|
||||
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
||||
|
@ -394,8 +394,10 @@ private:
|
|||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
u32 enabled_compute_texture_buffers = 0;
|
||||
u32 written_compute_texture_buffers = 0;
|
||||
u32 image_compute_texture_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
|
||||
|
||||
|
@ -431,8 +433,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
|||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
Runtime& runtime_)
|
||||
: rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
|
||||
gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
|
||||
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
|
||||
// Ensure the first slot is used for the null buffer
|
||||
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
||||
deletion_iterator = slot_buffers.end();
|
||||
|
@ -703,13 +705,18 @@ template <class P>
|
|||
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
|
||||
enabled_texture_buffers[stage] = 0;
|
||||
written_texture_buffers[stage] = 0;
|
||||
image_texture_buffers[stage] = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
|
||||
u32 size, PixelFormat format, bool is_written) {
|
||||
u32 size, PixelFormat format, bool is_written,
|
||||
bool is_image) {
|
||||
enabled_texture_buffers[stage] |= 1U << tbo_index;
|
||||
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
|
||||
}
|
||||
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
|
@ -717,6 +724,7 @@ template <class P>
|
|||
void BufferCache<P>::UnbindComputeStorageBuffers() {
|
||||
enabled_compute_storage_buffers = 0;
|
||||
written_compute_storage_buffers = 0;
|
||||
image_compute_texture_buffers = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -737,13 +745,17 @@ template <class P>
|
|||
void BufferCache<P>::UnbindComputeTextureBuffers() {
|
||||
enabled_compute_texture_buffers = 0;
|
||||
written_compute_texture_buffers = 0;
|
||||
image_compute_texture_buffers = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
|
||||
PixelFormat format, bool is_written) {
|
||||
PixelFormat format, bool is_written, bool is_image) {
|
||||
enabled_compute_texture_buffers |= 1U << tbo_index;
|
||||
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
|
||||
}
|
||||
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
|
@ -1057,7 +1069,6 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
||||
const TextureBufferBinding& binding = texture_buffers[stage][index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
|
@ -1066,9 +1077,12 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
|||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) {
|
||||
runtime.BindTextureBuffer(binding_index, buffer, offset, size, format);
|
||||
++binding_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((image_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
}
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
}
|
||||
|
@ -1139,7 +1153,6 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostComputeTextureBuffers() {
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
||||
const TextureBufferBinding& binding = compute_texture_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
|
@ -1148,9 +1161,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
|||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) {
|
||||
runtime.BindTextureBuffer(binding_index, buffer, offset, size, format);
|
||||
++binding_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((image_compute_texture_buffers >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
}
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
}
|
||||
|
@ -1339,11 +1355,10 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
|
|||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||
// Resolve buffer
|
||||
Binding& binding = compute_storage_buffers[index];
|
||||
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
binding.buffer_id = buffer_id;
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((written_compute_storage_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -2,14 +2,18 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
|
@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept {
|
|||
glMakeNamedBufferResidentNV(buffer.handle, access);
|
||||
}
|
||||
|
||||
GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
|
||||
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
|
||||
return offset == view.offset && size == view.size && format == view.format;
|
||||
})};
|
||||
if (it != views.end()) {
|
||||
return it->texture.handle;
|
||||
}
|
||||
OGLTexture texture;
|
||||
texture.Create(GL_TEXTURE_BUFFER);
|
||||
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
|
||||
glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size);
|
||||
views.push_back({
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.format = format,
|
||||
.texture = std::move(texture),
|
||||
});
|
||||
return views.back().texture.handle;
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
||||
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||
use_assembly_shaders{device.UseAssemblyShaders()},
|
||||
|
@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
|
|||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint base_binding = graphics_base_uniform_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
|
@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
|
|||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
|
||||
const GLuint base_binding = graphics_base_storage_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
|
@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
|
|||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
PixelFormat format) {
|
||||
*texture_handles++ = buffer.View(offset, size, format);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
|
||||
*image_handles++ = buffer.View(offset, size, format);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -32,6 +32,8 @@ public:
|
|||
|
||||
void MakeResident(GLenum access) noexcept;
|
||||
|
||||
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
||||
|
||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||
return address;
|
||||
}
|
||||
|
@ -41,9 +43,17 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
struct BufferView {
|
||||
u32 offset;
|
||||
u32 size;
|
||||
VideoCore::Surface::PixelFormat format;
|
||||
OGLTexture texture;
|
||||
};
|
||||
|
||||
GLuint64EXT address = 0;
|
||||
OGLBuffer buffer;
|
||||
GLenum current_residency_access = GL_NONE;
|
||||
std::vector<BufferView> views;
|
||||
};
|
||||
|
||||
class BufferCacheRuntime {
|
||||
|
@ -75,13 +85,19 @@ public:
|
|||
|
||||
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
VideoCore::Surface::PixelFormat format);
|
||||
|
||||
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
VideoCore::Surface::PixelFormat format);
|
||||
|
||||
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint handle = fast_uniforms[stage][binding_index].handle;
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint base_binding = graphics_base_uniform_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
||||
fast_uniforms[stage][binding_index].handle, 0,
|
||||
|
@ -103,7 +119,7 @@ public:
|
|||
|
||||
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
|
||||
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint base_binding = graphics_base_uniform_bindings[stage];
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
|
@ -118,6 +134,19 @@ public:
|
|||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
|
||||
graphics_base_uniform_bindings = bindings;
|
||||
}
|
||||
|
||||
void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
|
||||
graphics_base_storage_bindings = bindings;
|
||||
}
|
||||
|
||||
void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
|
||||
texture_handles = texture_handles_;
|
||||
image_handles = image_handles_;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@ -133,6 +162,11 @@ private:
|
|||
|
||||
u32 max_attributes = 0;
|
||||
|
||||
std::array<GLuint, 5> graphics_base_uniform_bindings{};
|
||||
std::array<GLuint, 5> graphics_base_storage_bindings{};
|
||||
GLuint* texture_handles = nullptr;
|
||||
GLuint* image_handles = nullptr;
|
||||
|
||||
std::optional<StreamBuffer> stream_buffer;
|
||||
|
||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||
|
@ -155,8 +189,8 @@ struct BufferCacheParams {
|
|||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = false;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
|
178
src/video_core/renderer_opengl/gl_compute_program.cpp
Normal file
178
src/video_core/renderer_opengl/gl_compute_program.cpp
Normal file
|
@ -0,0 +1,178 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "video_core/renderer_opengl/gl_compute_program.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
using VideoCommon::ImageId;
|
||||
|
||||
constexpr u32 MAX_TEXTURES = 64;
|
||||
constexpr u32 MAX_IMAGES = 16;
|
||||
|
||||
size_t ComputeProgramKey::Hash() const noexcept {
|
||||
return static_cast<size_t>(
|
||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
|
||||
}
|
||||
|
||||
bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept {
|
||||
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
ProgramManager& program_manager_, OGLProgram program_,
|
||||
const Shader::Info& info_)
|
||||
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
|
||||
kepler_compute{kepler_compute_},
|
||||
program_manager{program_manager_}, program{std::move(program_)}, info{info_} {
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
num_texture_buffers += desc.count;
|
||||
}
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
num_image_buffers += desc.count;
|
||||
}
|
||||
u32 num_textures = num_texture_buffers;
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
num_textures += desc.count;
|
||||
}
|
||||
ASSERT(num_textures <= MAX_TEXTURES);
|
||||
|
||||
u32 num_images = num_image_buffers;
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
num_images += desc.count;
|
||||
}
|
||||
ASSERT(num_images <= MAX_IMAGES);
|
||||
}
|
||||
|
||||
void ComputeProgram::Configure() {
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
size_t ssbo_index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
|
||||
desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
|
||||
boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
|
||||
std::array<GLuint, MAX_TEXTURES> samplers;
|
||||
std::array<GLuint, MAX_TEXTURES> textures;
|
||||
std::array<GLuint, MAX_IMAGES> images;
|
||||
GLsizei sampler_binding{};
|
||||
GLsizei texture_binding{};
|
||||
GLsizei image_binding{};
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& cbufs{qmd.const_buffer_config};
|
||||
const bool via_header_index{qmd.linked_tsc != 0};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
const u32 offset{desc.cbuf_offset + index_offset};
|
||||
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
|
||||
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
|
||||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
|
||||
if (desc.has_secondary) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
|
||||
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
|
||||
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
|
||||
secondary_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
|
||||
return TexturePair(lhs_raw | rhs_raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
}
|
||||
}};
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
samplers[sampler_binding++] = 0;
|
||||
}
|
||||
}
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_image);
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||
samplers[sampler_binding++] = sampler->Handle();
|
||||
}
|
||||
}
|
||||
std::ranges::for_each(info.image_descriptors, add_image);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.UnbindComputeTextureBuffers();
|
||||
size_t texbuf_index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
|
||||
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written, is_image);
|
||||
++texbuf_index;
|
||||
}
|
||||
}};
|
||||
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
|
||||
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
|
||||
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
|
||||
texture_binding += num_texture_buffers;
|
||||
image_binding += num_image_buffers;
|
||||
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
textures[texture_binding++] = image_view.Handle(desc.type);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
images[image_binding++] = image_view.Handle(desc.type);
|
||||
}
|
||||
}
|
||||
if (texture_binding != 0) {
|
||||
ASSERT(texture_binding == sampler_binding);
|
||||
glBindTextures(0, texture_binding, textures.data());
|
||||
glBindSamplers(0, sampler_binding, samplers.data());
|
||||
}
|
||||
if (image_binding != 0) {
|
||||
glBindImageTextures(0, image_binding, images.data());
|
||||
}
|
||||
program_manager.BindProgram(program.handle);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
83
src/video_core/renderer_opengl/gl_compute_program.h
Normal file
83
src/video_core/renderer_opengl/gl_compute_program.h
Normal file
|
@ -0,0 +1,83 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
class KeplerCompute;
|
||||
}
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class ProgramManager;
|
||||
|
||||
struct ComputeProgramKey {
|
||||
u64 unique_hash;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const ComputeProgramKey&) const noexcept;
|
||||
|
||||
bool operator!=(const ComputeProgramKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<ComputeProgramKey>);
|
||||
static_assert(std::is_trivially_copyable_v<ComputeProgramKey>);
|
||||
static_assert(std::is_trivially_constructible_v<ComputeProgramKey>);
|
||||
|
||||
class ComputeProgram {
|
||||
public:
|
||||
explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
ProgramManager& program_manager_, OGLProgram program_,
|
||||
const Shader::Info& info_);
|
||||
|
||||
void Configure();
|
||||
|
||||
private:
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
ProgramManager& program_manager;
|
||||
|
||||
OGLProgram program;
|
||||
Shader::Info info;
|
||||
|
||||
u32 num_texture_buffers{};
|
||||
u32 num_image_buffers{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<OpenGL::ComputeProgramKey> {
|
||||
size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
|
@ -22,34 +22,11 @@
|
|||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LIMIT_UBOS = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SSBOS = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SAMPLERS = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
|
||||
};
|
||||
constexpr std::array LIMIT_IMAGES = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
|
@ -82,15 +59,6 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
|
|||
return std::ranges::find(extensions, extension) != extensions.end();
|
||||
}
|
||||
|
||||
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
ASSERT(num >= amount);
|
||||
if (limit) {
|
||||
amount = std::min(amount, GetInteger<u32>(*limit));
|
||||
}
|
||||
num -= amount;
|
||||
return std::exchange(base, base + amount);
|
||||
}
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(),
|
||||
|
@ -98,62 +66,6 @@ std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcep
|
|||
return max;
|
||||
}
|
||||
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
||||
|
||||
static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
|
||||
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
|
||||
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
|
||||
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
|
||||
|
||||
u32 num_ubos = total_ubos - ReservedUniformBlocks;
|
||||
u32 num_ssbos = total_ssbos;
|
||||
u32 num_samplers = total_samplers;
|
||||
|
||||
u32 base_ubo = ReservedUniformBlocks;
|
||||
u32 base_ssbo = 0;
|
||||
u32 base_samplers = 0;
|
||||
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
bindings[stage] = {
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages,
|
||||
LIMIT_SAMPLERS[stage])};
|
||||
}
|
||||
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
u32 base_images = 0;
|
||||
|
||||
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
|
||||
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
|
||||
// fragment stage, and at least 1 for the rest of the stages.
|
||||
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
|
||||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
||||
// Reserve the other image bindings.
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
if (stage == 4) {
|
||||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
|
||||
}
|
||||
|
||||
// Compute doesn't care about any of this.
|
||||
bindings[5] = {0, 0, 0, 0};
|
||||
|
||||
return bindings;
|
||||
}
|
||||
|
||||
bool IsASTCSupported() {
|
||||
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
|
||||
static constexpr std::array formats = {
|
||||
|
@ -225,7 +137,6 @@ Device::Device() {
|
|||
}
|
||||
|
||||
max_uniform_buffers = BuildMaxUniformBuffers();
|
||||
base_bindings = BuildBaseBindings();
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
|
|
|
@ -12,13 +12,6 @@ namespace OpenGL {
|
|||
|
||||
class Device {
|
||||
public:
|
||||
struct BaseBindings {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
};
|
||||
|
||||
explicit Device();
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
|
@ -28,14 +21,6 @@ public:
|
|||
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||
return base_bindings[stage_index];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
}
|
||||
|
||||
size_t GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
@ -134,7 +119,6 @@ private:
|
|||
|
||||
std::string vendor_name;
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
size_t uniform_buffer_alignment{};
|
||||
size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
|
|
296
src/video_core/renderer_opengl/gl_graphics_program.cpp
Normal file
296
src/video_core/renderer_opengl/gl_graphics_program.cpp
Normal file
|
@ -0,0 +1,296 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_graphics_program.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
using VideoCommon::ImageId;
|
||||
|
||||
constexpr u32 MAX_TEXTURES = 64;
|
||||
constexpr u32 MAX_IMAGES = 8;
|
||||
|
||||
size_t GraphicsProgramKey::Hash() const noexcept {
|
||||
return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
|
||||
}
|
||||
|
||||
bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept {
|
||||
return std::memcmp(this, &rhs, Size()) == 0;
|
||||
}
|
||||
|
||||
GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||
OGLProgram program_,
|
||||
const std::array<const Shader::Info*, 5>& infos)
|
||||
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
|
||||
gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
|
||||
state_tracker{state_tracker_}, program{std::move(program_)} {
|
||||
std::ranges::transform(infos, stage_infos.begin(),
|
||||
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
|
||||
|
||||
u32 num_textures{};
|
||||
u32 num_images{};
|
||||
for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) {
|
||||
const auto& info{stage_infos[stage]};
|
||||
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
|
||||
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
|
||||
for (const auto& desc : info.constant_buffer_descriptors) {
|
||||
base_uniform_bindings[stage + 1] += desc.count;
|
||||
}
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
base_storage_bindings[stage + 1] += desc.count;
|
||||
}
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
num_texture_buffers[stage] += desc.count;
|
||||
num_textures += desc.count;
|
||||
}
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
num_image_buffers[stage] += desc.count;
|
||||
num_images += desc.count;
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
num_textures += desc.count;
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
num_images += desc.count;
|
||||
}
|
||||
}
|
||||
ASSERT(num_textures <= MAX_TEXTURES);
|
||||
ASSERT(num_images <= MAX_IMAGES);
|
||||
}
|
||||
|
||||
struct Spec {
|
||||
static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
|
||||
static constexpr bool has_storage_buffers = true;
|
||||
static constexpr bool has_texture_buffers = true;
|
||||
static constexpr bool has_image_buffers = true;
|
||||
static constexpr bool has_images = true;
|
||||
};
|
||||
|
||||
void GraphicsProgram::Configure(bool is_indexed) {
|
||||
std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
|
||||
std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
|
||||
std::array<GLuint, MAX_TEXTURES> samplers;
|
||||
size_t image_view_index{};
|
||||
GLsizei sampler_binding{};
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
|
||||
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
||||
const auto config_stage{[&](size_t stage) {
|
||||
const Shader::Info& info{stage_infos[stage]};
|
||||
buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
if constexpr (Spec::has_storage_buffers) {
|
||||
size_t ssbo_index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
|
||||
desc.cbuf_offset, desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
}
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(cbufs[desc.cbuf_index].enabled);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
const u32 offset{desc.cbuf_offset + index_offset};
|
||||
const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
|
||||
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
|
||||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
|
||||
if (desc.has_secondary) {
|
||||
ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
|
||||
const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
|
||||
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
|
||||
second_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
|
||||
const u32 raw{lhs_raw | rhs_raw};
|
||||
return TexturePair(raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_view_index++] = handle.first;
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::has_texture_buffers) {
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_view_index++] = handle.first;
|
||||
samplers[sampler_binding++] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_image_buffers) {
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
add_image(desc);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_view_index++] = handle.first;
|
||||
|
||||
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
||||
samplers[sampler_binding++] = sampler->Handle();
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_images) {
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
add_image(desc);
|
||||
}
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::enabled_stages[0]) {
|
||||
config_stage(0);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[1]) {
|
||||
config_stage(1);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[2]) {
|
||||
config_stage(2);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[3]) {
|
||||
config_stage(3);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[4]) {
|
||||
config_stage(4);
|
||||
}
|
||||
const std::span indices_span(image_view_indices.data(), image_view_index);
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
ImageId* texture_buffer_index{image_view_ids.data()};
|
||||
const auto bind_stage_info{[&](size_t stage) {
|
||||
size_t index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
|
||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
++texture_buffer_index;
|
||||
}
|
||||
}};
|
||||
const Shader::Info& info{stage_infos[stage]};
|
||||
buffer_cache.UnbindGraphicsTextureBuffers(stage);
|
||||
|
||||
if constexpr (Spec::has_texture_buffers) {
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
add_buffer(desc);
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_image_buffers) {
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
add_buffer(desc);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
texture_buffer_index += desc.count;
|
||||
}
|
||||
if constexpr (Spec::has_images) {
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
texture_buffer_index += desc.count;
|
||||
}
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::enabled_stages[0]) {
|
||||
bind_stage_info(0);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[1]) {
|
||||
bind_stage_info(1);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[2]) {
|
||||
bind_stage_info(2);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[3]) {
|
||||
bind_stage_info(3);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[4]) {
|
||||
bind_stage_info(4);
|
||||
}
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
const ImageId* views_it{image_view_ids.data()};
|
||||
GLsizei texture_binding = 0;
|
||||
GLsizei image_binding = 0;
|
||||
std::array<GLuint, MAX_TEXTURES> textures;
|
||||
std::array<GLuint, MAX_IMAGES> images;
|
||||
const auto prepare_stage{[&](size_t stage) {
|
||||
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
|
||||
texture_binding += num_texture_buffers[stage];
|
||||
image_binding += num_image_buffers[stage];
|
||||
|
||||
const auto& info{stage_infos[stage]};
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
textures[texture_binding++] = image_view.Handle(desc.type);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
|
||||
images[image_binding++] = image_view.Handle(desc.type);
|
||||
}
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::enabled_stages[0]) {
|
||||
prepare_stage(0);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[1]) {
|
||||
prepare_stage(1);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[2]) {
|
||||
prepare_stage(2);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[3]) {
|
||||
prepare_stage(3);
|
||||
}
|
||||
if constexpr (Spec::enabled_stages[4]) {
|
||||
prepare_stage(4);
|
||||
}
|
||||
if (texture_binding != 0) {
|
||||
ASSERT(texture_binding == sampler_binding);
|
||||
glBindTextures(0, texture_binding, textures.data());
|
||||
glBindSamplers(0, sampler_binding, samplers.data());
|
||||
}
|
||||
if (image_binding != 0) {
|
||||
glBindImageTextures(0, image_binding, images.data());
|
||||
}
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindProgram(program.handle);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
105
src/video_core/renderer_opengl/gl_graphics_program.h
Normal file
105
src/video_core/renderer_opengl/gl_graphics_program.h
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class ProgramManager;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct GraphicsProgramKey {
|
||||
struct TransformFeedbackState {
|
||||
struct Layout {
|
||||
u32 stream;
|
||||
u32 varying_count;
|
||||
u32 stride;
|
||||
};
|
||||
std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
|
||||
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
|
||||
};
|
||||
|
||||
std::array<u64, 6> unique_hashes;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> xfb_enabled;
|
||||
BitField<1, 1, u32> early_z;
|
||||
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
|
||||
BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
|
||||
BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
|
||||
BitField<10, 1, u32> tessellation_clockwise;
|
||||
};
|
||||
std::array<u32, 3> padding;
|
||||
TransformFeedbackState xfb_state;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const GraphicsProgramKey&) const noexcept;
|
||||
|
||||
bool operator!=(const GraphicsProgramKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
if (xfb_enabled != 0) {
|
||||
return sizeof(GraphicsProgramKey);
|
||||
} else {
|
||||
return offsetof(GraphicsProgramKey, padding);
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
|
||||
|
||||
class GraphicsProgram {
|
||||
public:
|
||||
explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||
OGLProgram program_, const std::array<const Shader::Info*, 5>& infos);
|
||||
|
||||
void Configure(bool is_indexed);
|
||||
|
||||
private:
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLProgram program;
|
||||
std::array<Shader::Info, 5> stage_infos{};
|
||||
std::array<u32, 5> base_uniform_bindings{};
|
||||
std::array<u32, 5> base_storage_bindings{};
|
||||
std::array<u32, 5> num_texture_buffers{};
|
||||
std::array<u32, 5> num_image_buffers{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<OpenGL::GraphicsProgramKey> {
|
||||
size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
|
@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
|
||||
buffer_cache, program_manager, state_tracker),
|
||||
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
|
||||
|
||||
|
@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
|
||||
SyncState();
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()};
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
program->Configure(is_indexed);
|
||||
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
BeginTransformFeedback(primitive_mode);
|
||||
|
@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
num_instances, base_instance);
|
||||
}
|
||||
}
|
||||
|
||||
EndTransformFeedback();
|
||||
|
||||
++num_queued_commands;
|
||||
|
@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute() {
|
||||
UNREACHABLE_MSG("Not implemented");
|
||||
ComputeProgram* const program{shader_cache.CurrentComputeProgram()};
|
||||
if (!program) {
|
||||
return;
|
||||
}
|
||||
program->Configure();
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||
|
@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
|
||||
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
|
||||
|
||||
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
|
||||
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
|
||||
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,11 @@
|
|||
#include "common/scope_exit.h"
|
||||
#include "core/core.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/frontend/maxwell/program.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
|
@ -25,17 +30,281 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
// FIXME: Move this somewhere else
|
||||
const Shader::Profile profile{
|
||||
.supported_spirv = 0x00010000,
|
||||
|
||||
.unified_descriptor_binding = false,
|
||||
.support_descriptor_aliasing = false,
|
||||
.support_int8 = false,
|
||||
.support_int16 = false,
|
||||
.support_vertex_instance_id = true,
|
||||
.support_float_controls = false,
|
||||
.support_separate_denorm_behavior = false,
|
||||
.support_separate_rounding_mode = false,
|
||||
.support_fp16_denorm_preserve = false,
|
||||
.support_fp32_denorm_preserve = false,
|
||||
.support_fp16_denorm_flush = false,
|
||||
.support_fp32_denorm_flush = false,
|
||||
.support_fp16_signed_zero_nan_preserve = false,
|
||||
.support_fp32_signed_zero_nan_preserve = false,
|
||||
.support_fp64_signed_zero_nan_preserve = false,
|
||||
.support_explicit_workgroup_layout = false,
|
||||
.support_vote = true,
|
||||
.support_viewport_index_layer_non_geometry = true,
|
||||
.support_viewport_mask = true,
|
||||
.support_typeless_image_loads = true,
|
||||
.support_demote_to_helper_invocation = false,
|
||||
.warp_size_potentially_larger_than_guest = true,
|
||||
.support_int64_atomics = false,
|
||||
.lower_left_origin_mode = true,
|
||||
|
||||
.has_broken_spirv_clamp = true,
|
||||
.has_broken_unsigned_image_offsets = true,
|
||||
.has_broken_signed_operations = true,
|
||||
.ignore_nan_fp_comparisons = true,
|
||||
|
||||
.generic_input_types = {},
|
||||
.convert_depth_mode = false,
|
||||
.force_early_z = false,
|
||||
|
||||
.tess_primitive = {},
|
||||
.tess_spacing = {},
|
||||
.tess_clockwise = false,
|
||||
|
||||
.input_topology = Shader::InputTopology::Triangles,
|
||||
|
||||
.fixed_state_point_size = std::nullopt,
|
||||
|
||||
.alpha_test_func = Shader::CompareFunction::Always,
|
||||
.alpha_test_reference = 0.0f,
|
||||
|
||||
.y_negate = false,
|
||||
|
||||
.xfb_varyings = {},
|
||||
};
|
||||
|
||||
using Shader::Backend::SPIRV::EmitSPIRV;
|
||||
using Shader::Maxwell::TranslateProgram;
|
||||
using VideoCommon::ComputeEnvironment;
|
||||
using VideoCommon::GraphicsEnvironment;
|
||||
|
||||
template <typename Container>
|
||||
auto MakeSpan(Container& container) {
|
||||
return std::span(container.data(), container.size());
|
||||
}
|
||||
|
||||
void AddShader(GLenum stage, GLuint program, std::span<const u32> code) {
|
||||
OGLShader shader;
|
||||
shader.handle = glCreateShader(stage);
|
||||
|
||||
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
|
||||
static_cast<GLsizei>(code.size_bytes()));
|
||||
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
|
||||
glAttachShader(program, shader.handle);
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return;
|
||||
}
|
||||
GLint shader_status{};
|
||||
glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status);
|
||||
if (shader_status == GL_FALSE) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to build shader");
|
||||
}
|
||||
GLint log_length{};
|
||||
glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length);
|
||||
if (log_length == 0) {
|
||||
return;
|
||||
}
|
||||
std::string log(log_length, 0);
|
||||
glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data());
|
||||
if (shader_status == GL_FALSE) {
|
||||
LOG_ERROR(Render_OpenGL, "{}", log);
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "{}", log);
|
||||
}
|
||||
}
|
||||
|
||||
void LinkProgram(GLuint program) {
|
||||
glLinkProgram(program);
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return;
|
||||
}
|
||||
GLint link_status{};
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &link_status);
|
||||
|
||||
GLint log_length{};
|
||||
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
|
||||
if (log_length == 0) {
|
||||
return;
|
||||
}
|
||||
std::string log(log_length, 0);
|
||||
glGetProgramInfoLog(program, log_length, nullptr, log.data());
|
||||
if (link_status == GL_FALSE) {
|
||||
LOG_ERROR(Render_OpenGL, "{}", log);
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "{}", log);
|
||||
}
|
||||
}
|
||||
|
||||
GLenum Stage(size_t stage_index) {
|
||||
switch (stage_index) {
|
||||
case 0:
|
||||
return GL_VERTEX_SHADER;
|
||||
case 1:
|
||||
return GL_TESS_CONTROL_SHADER;
|
||||
case 2:
|
||||
return GL_TESS_EVALUATION_SHADER;
|
||||
case 3:
|
||||
return GL_GEOMETRY_SHADER;
|
||||
case 4:
|
||||
return GL_FRAGMENT_SHADER;
|
||||
}
|
||||
UNREACHABLE_MSG("{}", stage_index);
|
||||
return GL_NONE;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_)
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_)
|
||||
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
|
||||
emu_window{emu_window_}, gpu{gpu_}, device{device_} {}
|
||||
emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
|
||||
buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{
|
||||
state_tracker_} {}
|
||||
|
||||
ShaderCache::~ShaderCache() = default;
|
||||
|
||||
GraphicsProgram* ShaderCache::CurrentGraphicsProgram() {
|
||||
if (!RefreshStages(graphics_key.unique_hashes)) {
|
||||
return nullptr;
|
||||
}
|
||||
const auto& regs{maxwell3d.regs};
|
||||
graphics_key.raw = 0;
|
||||
graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
|
||||
graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
|
||||
? regs.draw.topology.Value()
|
||||
: Maxwell::PrimitiveTopology{});
|
||||
graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
|
||||
graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
|
||||
graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
|
||||
|
||||
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
|
||||
auto& program{pair->second};
|
||||
if (is_new) {
|
||||
program = CreateGraphicsProgram();
|
||||
}
|
||||
return program.get();
|
||||
}
|
||||
|
||||
ComputeProgram* ShaderCache::CurrentComputeProgram() {
|
||||
const VideoCommon::ShaderInfo* const shader{ComputeShader()};
|
||||
if (!shader) {
|
||||
return nullptr;
|
||||
}
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const ComputeProgramKey key{
|
||||
.unique_hash = shader->unique_hash,
|
||||
.shared_memory_size = qmd.shared_alloc,
|
||||
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
|
||||
};
|
||||
const auto [pair, is_new]{compute_cache.try_emplace(key)};
|
||||
auto& pipeline{pair->second};
|
||||
if (!is_new) {
|
||||
return pipeline.get();
|
||||
}
|
||||
pipeline = CreateComputeProgram(key, shader);
|
||||
return pipeline.get();
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram() {
|
||||
GraphicsEnvironments environments;
|
||||
GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
|
||||
|
||||
main_pools.ReleaseContents();
|
||||
return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true);
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram(
|
||||
ShaderPools& pools, const GraphicsProgramKey& key, std::span<Shader::Environment* const> envs,
|
||||
bool build_in_parallel) {
|
||||
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
|
||||
size_t env_index{0};
|
||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (key.unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
Shader::Environment& env{*envs[env_index]};
|
||||
++env_index;
|
||||
|
||||
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
||||
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
|
||||
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
|
||||
}
|
||||
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
||||
|
||||
OGLProgram gl_program;
|
||||
gl_program.handle = glCreateProgram();
|
||||
|
||||
Shader::Backend::SPIRV::Bindings binding;
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (key.unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
UNIMPLEMENTED_IF(index == 0);
|
||||
|
||||
Shader::IR::Program& program{programs[index]};
|
||||
const size_t stage_index{index - 1};
|
||||
infos[stage_index] = &program.info;
|
||||
|
||||
const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
|
||||
FILE* file = fopen("D:\\shader.spv", "wb");
|
||||
fwrite(code.data(), 4, code.size(), file);
|
||||
fclose(file);
|
||||
AddShader(Stage(stage_index), gl_program.handle, code);
|
||||
}
|
||||
LinkProgram(gl_program.handle);
|
||||
|
||||
return std::make_unique<GraphicsProgram>(texture_cache, buffer_cache, gpu_memory, maxwell3d,
|
||||
program_manager, state_tracker, std::move(gl_program),
|
||||
infos);
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(
|
||||
const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
|
||||
env.SetCachedSize(shader->size_bytes);
|
||||
|
||||
main_pools.ReleaseContents();
|
||||
return CreateComputeProgram(main_pools, key, env, true);
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(ShaderPools& pools,
|
||||
const ComputeProgramKey& key,
|
||||
Shader::Environment& env,
|
||||
bool build_in_parallel) {
|
||||
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
|
||||
|
||||
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
||||
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
|
||||
Shader::Backend::SPIRV::Bindings binding;
|
||||
const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
|
||||
OGLProgram gl_program;
|
||||
gl_program.handle = glCreateProgram();
|
||||
AddShader(GL_COMPUTE_SHADER, gl_program.handle, code);
|
||||
LinkProgram(gl_program.handle);
|
||||
return std::make_unique<ComputeProgram>(texture_cache, buffer_cache, gpu_memory, kepler_compute,
|
||||
program_manager, std::move(gl_program), program.info);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -5,20 +5,18 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_compute_program.h"
|
||||
#include "video_core/renderer_opengl/gl_graphics_program.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
@ -32,64 +30,62 @@ class EmuWindow;
|
|||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct GraphicsProgramKey {
|
||||
struct TransformFeedbackState {
|
||||
struct Layout {
|
||||
u32 stream;
|
||||
u32 varying_count;
|
||||
u32 stride;
|
||||
};
|
||||
std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
|
||||
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
|
||||
};
|
||||
|
||||
std::array<u64, 6> unique_hashes;
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> xfb_enabled;
|
||||
BitField<1, 1, u32> early_z;
|
||||
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
|
||||
BitField<6, 2, u32> tessellation_primitive;
|
||||
BitField<8, 2, u32> tessellation_spacing;
|
||||
BitField<10, 1, u32> tessellation_clockwise;
|
||||
};
|
||||
u32 padding;
|
||||
TransformFeedbackState xfb_state;
|
||||
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
if (xfb_enabled != 0) {
|
||||
return sizeof(GraphicsProgramKey);
|
||||
} else {
|
||||
return offsetof(GraphicsProgramKey, padding);
|
||||
}
|
||||
struct ShaderPools {
|
||||
void ReleaseContents() {
|
||||
flow_block.ReleaseContents();
|
||||
block.ReleaseContents();
|
||||
inst.ReleaseContents();
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
|
||||
|
||||
class GraphicsProgram {
|
||||
public:
|
||||
private:
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst;
|
||||
Shader::ObjectPool<Shader::IR::Block> block;
|
||||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
|
||||
};
|
||||
|
||||
class ShaderCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_);
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_);
|
||||
~ShaderCache();
|
||||
|
||||
[[nodiscard]] GraphicsProgram* CurrentGraphicsProgram();
|
||||
|
||||
[[nodiscard]] ComputeProgram* CurrentComputeProgram();
|
||||
|
||||
private:
|
||||
std::unique_ptr<GraphicsProgram> CreateGraphicsProgram();
|
||||
|
||||
std::unique_ptr<GraphicsProgram> CreateGraphicsProgram(
|
||||
ShaderPools& pools, const GraphicsProgramKey& key,
|
||||
std::span<Shader::Environment* const> envs, bool build_in_parallel);
|
||||
|
||||
std::unique_ptr<ComputeProgram> CreateComputeProgram(const ComputeProgramKey& key,
|
||||
const VideoCommon::ShaderInfo* shader);
|
||||
|
||||
std::unique_ptr<ComputeProgram> CreateComputeProgram(ShaderPools& pools,
|
||||
const ComputeProgramKey& key,
|
||||
Shader::Environment& env,
|
||||
bool build_in_parallel);
|
||||
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
Tegra::GPU& gpu;
|
||||
const Device& device;
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
GraphicsProgramKey graphics_key{};
|
||||
|
||||
ShaderPools main_pools;
|
||||
std::unordered_map<GraphicsProgramKey, std::unique_ptr<GraphicsProgram>> graphics_cache;
|
||||
std::unordered_map<ComputeProgramKey, std::unique_ptr<ComputeProgram>> compute_cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -1,149 +1,3 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
|
||||
if (current == old) {
|
||||
return;
|
||||
}
|
||||
if (current == 0) {
|
||||
if (enabled) {
|
||||
enabled = false;
|
||||
glDisable(stage);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!enabled) {
|
||||
enabled = true;
|
||||
glEnable(stage);
|
||||
}
|
||||
glBindProgramARB(stage, current);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ProgramManager::ProgramManager(const Device& device)
|
||||
: use_assembly_programs{device.UseAssemblyShaders()} {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
} else {
|
||||
graphics_pipeline.Create();
|
||||
glBindProgramPipeline(graphics_pipeline.handle);
|
||||
}
|
||||
}
|
||||
|
||||
ProgramManager::~ProgramManager() = default;
|
||||
|
||||
void ProgramManager::BindCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||
} else {
|
||||
is_graphics_bound = false;
|
||||
glUseProgram(program);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindGraphicsPipeline() {
|
||||
if (!use_assembly_programs) {
|
||||
UpdateSourcePrograms();
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostPipeline(GLuint pipeline) {
|
||||
if (use_assembly_programs) {
|
||||
if (geometry_enabled) {
|
||||
geometry_enabled = false;
|
||||
old_state.geometry = 0;
|
||||
glDisable(GL_GEOMETRY_PROGRAM_NV);
|
||||
}
|
||||
} else {
|
||||
if (!is_graphics_bound) {
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
glBindProgramPipeline(pipeline);
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestPipeline() {
|
||||
if (use_assembly_programs) {
|
||||
glBindProgramPipeline(0);
|
||||
} else {
|
||||
glBindProgramPipeline(graphics_pipeline.handle);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glDisable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
glUseProgram(program);
|
||||
is_graphics_bound = false;
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestCompute() {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::UseVertexShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||
}
|
||||
current_state.vertex = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UseGeometryShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
|
||||
}
|
||||
current_state.geometry = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UseFragmentShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
|
||||
}
|
||||
current_state.fragment = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UpdateSourcePrograms() {
|
||||
if (!is_graphics_bound) {
|
||||
is_graphics_bound = true;
|
||||
glUseProgram(0);
|
||||
}
|
||||
|
||||
const GLuint handle = graphics_pipeline.handle;
|
||||
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
|
||||
if (current == old) {
|
||||
return;
|
||||
}
|
||||
glUseProgramStages(handle, stage, current);
|
||||
};
|
||||
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
|
||||
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
|
||||
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
|
||||
|
||||
old_state = current_state;
|
||||
}
|
||||
|
||||
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
|
||||
const auto& regs = maxwell.regs;
|
||||
|
||||
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
|
||||
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -4,79 +4,24 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
|
||||
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
/// Not following that rule will cause problems on some AMD drivers.
|
||||
struct alignas(16) MaxwellUniformData {
|
||||
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
|
||||
|
||||
GLfloat y_direction;
|
||||
};
|
||||
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
|
||||
static_assert(sizeof(MaxwellUniformData) < 16384,
|
||||
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
class ProgramManager {
|
||||
public:
|
||||
explicit ProgramManager(const Device& device);
|
||||
~ProgramManager();
|
||||
void BindProgram(GLuint program) {
|
||||
if (bound_program == program) {
|
||||
return;
|
||||
}
|
||||
bound_program = program;
|
||||
glUseProgram(program);
|
||||
}
|
||||
|
||||
/// Binds a compute program
|
||||
void BindCompute(GLuint program);
|
||||
|
||||
/// Updates bound programs.
|
||||
void BindGraphicsPipeline();
|
||||
|
||||
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
|
||||
void BindHostPipeline(GLuint pipeline);
|
||||
|
||||
/// Rewinds BindHostPipeline state changes.
|
||||
void RestoreGuestPipeline();
|
||||
|
||||
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
|
||||
void BindHostCompute(GLuint program);
|
||||
|
||||
/// Rewinds BindHostCompute state changes.
|
||||
void RestoreGuestCompute();
|
||||
|
||||
void UseVertexShader(GLuint program);
|
||||
void UseGeometryShader(GLuint program);
|
||||
void UseFragmentShader(GLuint program);
|
||||
void RestoreGuestCompute() {}
|
||||
|
||||
private:
|
||||
struct PipelineState {
|
||||
GLuint vertex = 0;
|
||||
GLuint geometry = 0;
|
||||
GLuint fragment = 0;
|
||||
};
|
||||
|
||||
/// Update GLSL programs.
|
||||
void UpdateSourcePrograms();
|
||||
|
||||
OGLPipeline graphics_pipeline;
|
||||
|
||||
PipelineState current_state;
|
||||
PipelineState old_state;
|
||||
|
||||
bool use_assembly_programs = false;
|
||||
|
||||
bool is_graphics_bound = true;
|
||||
|
||||
bool vertex_enabled = false;
|
||||
bool geometry_enabled = false;
|
||||
bool fragment_enabled = false;
|
||||
GLuint bound_program = 0;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -24,9 +24,7 @@
|
|||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using Tegra::Texture::TextureMipmapFilter;
|
||||
using Tegra::Texture::TextureType;
|
||||
|
@ -59,107 +57,6 @@ struct CopyRegion {
|
|||
GLsizei depth;
|
||||
};
|
||||
|
||||
struct FormatTuple {
|
||||
GLenum internal_format;
|
||||
GLenum format = GL_NONE;
|
||||
GLenum type = GL_NONE;
|
||||
};
|
||||
|
||||
constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
|
||||
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
|
||||
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
|
||||
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
|
||||
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
|
||||
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
|
||||
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
|
||||
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
|
||||
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
|
||||
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
|
||||
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
|
||||
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
|
||||
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
|
||||
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
|
||||
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
|
||||
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
|
||||
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
|
||||
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
|
||||
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
|
||||
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
|
||||
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
|
||||
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
|
||||
}};
|
||||
|
||||
constexpr std::array ACCELERATED_FORMATS{
|
||||
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
|
||||
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
|
||||
|
@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
|
|||
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
|
||||
};
|
||||
|
||||
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
|
||||
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
|
||||
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
|
||||
}
|
||||
|
||||
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
|
||||
switch (info.type) {
|
||||
case ImageType::e1D:
|
||||
|
@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
|
|||
return GL_NONE;
|
||||
}
|
||||
|
||||
GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
|
||||
GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
|
||||
const bool is_multisampled = num_samples > 1;
|
||||
switch (type) {
|
||||
case ImageViewType::e1D:
|
||||
case Shader::TextureType::Color1D:
|
||||
return GL_TEXTURE_1D;
|
||||
case ImageViewType::e2D:
|
||||
case Shader::TextureType::Color2D:
|
||||
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
|
||||
case ImageViewType::Cube:
|
||||
case Shader::TextureType::ColorCube:
|
||||
return GL_TEXTURE_CUBE_MAP;
|
||||
case ImageViewType::e3D:
|
||||
case Shader::TextureType::Color3D:
|
||||
return GL_TEXTURE_3D;
|
||||
case ImageViewType::e1DArray:
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
return GL_TEXTURE_1D_ARRAY;
|
||||
case ImageViewType::e2DArray:
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
|
||||
case ImageViewType::CubeArray:
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
||||
case ImageViewType::Rect:
|
||||
return GL_TEXTURE_RECTANGLE;
|
||||
case ImageViewType::Buffer:
|
||||
case Shader::TextureType::Buffer:
|
||||
return GL_TEXTURE_BUFFER;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid image view type={}", type);
|
||||
|
@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
|
|||
default:
|
||||
return false;
|
||||
}
|
||||
const GLenum internal_format = GetFormatTuple(info.format).internal_format;
|
||||
const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
|
||||
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
|
||||
if (format_info.is_compressed) {
|
||||
return false;
|
||||
|
@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
|
|||
|
||||
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
||||
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
|
||||
const GLuint texture = image_view->DefaultHandle();
|
||||
glNamedFramebufferTexture(fbo, attachment, texture, 0);
|
||||
glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
|
||||
return;
|
||||
}
|
||||
const GLuint texture = image_view->Handle(ImageViewType::e3D);
|
||||
const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
|
||||
if (image_view->range.extent.layers > 1) {
|
||||
// TODO: OpenGL doesn't support rendering to a fixed number of slices
|
||||
glNamedFramebufferTexture(fbo, attachment, texture, 0);
|
||||
|
@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
|
||||
for (size_t i = 0; i < TARGETS.size(); ++i) {
|
||||
const GLenum target = TARGETS[i];
|
||||
for (const FormatTuple& tuple : FORMAT_TABLE) {
|
||||
for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
|
||||
const GLenum format = tuple.internal_format;
|
||||
GLint compat_class;
|
||||
GLint compat_type;
|
||||
|
@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
|
||||
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
|
||||
null_image_3d.Create(GL_TEXTURE_3D);
|
||||
null_image_rect.Create(GL_TEXTURE_RECTANGLE);
|
||||
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
|
||||
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
|
||||
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
|
||||
glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
|
||||
|
||||
std::array<GLuint, 4> new_handles;
|
||||
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
|
||||
|
@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
|
|||
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
|
||||
GL_R8, 0, 1, 0, 6);
|
||||
const std::array texture_handles{
|
||||
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
|
||||
null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
|
||||
null_image_view_2d_array.handle, null_image_view_cube.handle,
|
||||
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
|
||||
null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
|
||||
null_image_view_cube.handle,
|
||||
};
|
||||
for (const GLuint handle : texture_handles) {
|
||||
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
|
||||
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
|
||||
}
|
||||
const auto set_view = [this](ImageViewType type, GLuint handle) {
|
||||
const auto set_view = [this](Shader::TextureType type, GLuint handle) {
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const std::string name = fmt::format("NullImage {}", type);
|
||||
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
}
|
||||
null_image_views[static_cast<size_t>(type)] = handle;
|
||||
};
|
||||
set_view(ImageViewType::e1D, null_image_view_1d.handle);
|
||||
set_view(ImageViewType::e2D, null_image_view_2d.handle);
|
||||
set_view(ImageViewType::Cube, null_image_view_cube.handle);
|
||||
set_view(ImageViewType::e3D, null_image_3d.handle);
|
||||
set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
|
||||
set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
|
||||
set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
|
||||
set_view(ImageViewType::Rect, null_image_rect.handle);
|
||||
set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
|
||||
set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
|
||||
set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
|
||||
set_view(Shader::TextureType::Color3D, null_image_3d.handle);
|
||||
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
|
||||
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
|
||||
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
|
||||
}
|
||||
|
||||
TextureCacheRuntime::~TextureCacheRuntime() = default;
|
||||
|
@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
|||
gl_format = GL_RGBA;
|
||||
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
} else {
|
||||
const auto& tuple = GetFormatTuple(info.format);
|
||||
const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
|
||||
gl_internal_format = tuple.internal_format;
|
||||
gl_format = tuple.format;
|
||||
gl_type = tuple.type;
|
||||
|
@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
|||
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
|
||||
break;
|
||||
case GL_TEXTURE_BUFFER:
|
||||
buffer.Create();
|
||||
glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
|
||||
UNREACHABLE();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
|
||||
|
@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
|
|||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
|
||||
copy.dst_offset, copy.size);
|
||||
}
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||
|
@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
|
||||
} else {
|
||||
internal_format = GetFormatTuple(format).internal_format;
|
||||
internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
|
||||
}
|
||||
VideoCommon::SubresourceRange flatten_range = info.range;
|
||||
std::array<GLuint, 2> handles;
|
||||
|
@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
[[fallthrough]];
|
||||
case ImageViewType::e1D:
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
|
||||
SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
|
||||
SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range);
|
||||
SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range);
|
||||
break;
|
||||
case ImageViewType::e2DArray:
|
||||
flatten_range.extent.layers = 1;
|
||||
|
@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
.extent = {.levels = 1, .layers = 1},
|
||||
};
|
||||
glGenTextures(1, handles.data());
|
||||
SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
|
||||
break;
|
||||
SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range);
|
||||
} else {
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range);
|
||||
SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info,
|
||||
info.range);
|
||||
}
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
|
||||
SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
|
||||
break;
|
||||
case ImageViewType::e3D:
|
||||
glGenTextures(1, handles.data());
|
||||
SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
|
||||
SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range);
|
||||
break;
|
||||
case ImageViewType::CubeArray:
|
||||
flatten_range.extent.layers = 6;
|
||||
[[fallthrough]];
|
||||
case ImageViewType::Cube:
|
||||
glGenTextures(2, handles.data());
|
||||
SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
|
||||
SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
|
||||
SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range);
|
||||
SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range);
|
||||
break;
|
||||
case ImageViewType::Rect:
|
||||
glGenTextures(1, handles.data());
|
||||
SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case ImageViewType::Buffer:
|
||||
glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
|
||||
SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
switch (info.type) {
|
||||
case ImageViewType::e1D:
|
||||
default_handle = Handle(Shader::TextureType::Color1D);
|
||||
break;
|
||||
case ImageViewType::e1DArray:
|
||||
default_handle = Handle(Shader::TextureType::ColorArray1D);
|
||||
break;
|
||||
case ImageViewType::e2D:
|
||||
default_handle = Handle(Shader::TextureType::Color2D);
|
||||
break;
|
||||
case ImageViewType::e2DArray:
|
||||
default_handle = Handle(Shader::TextureType::ColorArray2D);
|
||||
break;
|
||||
case ImageViewType::e3D:
|
||||
default_handle = Handle(Shader::TextureType::Color3D);
|
||||
break;
|
||||
case ImageViewType::Cube:
|
||||
default_handle = Handle(Shader::TextureType::ColorCube);
|
||||
break;
|
||||
case ImageViewType::CubeArray:
|
||||
default_handle = Handle(Shader::TextureType::ColorArrayCube);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default_handle = Handle(info.type);
|
||||
}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
|
||||
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
|
||||
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info)
|
||||
: VideoCommon::ImageViewBase{info, view_info} {}
|
||||
|
@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
|||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
|
||||
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
|
||||
|
||||
void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
|
||||
void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type,
|
||||
GLuint handle, const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range) {
|
||||
if (info.type == ImageViewType::Buffer) {
|
||||
// TODO: Take offset from buffer cache
|
||||
glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
|
||||
image.guest_size_bytes);
|
||||
} else {
|
||||
const GLuint parent = image.texture.handle;
|
||||
const GLenum target = ImageTarget(view_type, image.info.num_samples);
|
||||
glTextureView(handle, target, parent, internal_format, view_range.base.level,
|
||||
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
|
||||
if (!info.IsRenderTarget()) {
|
||||
ApplySwizzle(handle, format, info.Swizzle());
|
||||
}
|
||||
const GLuint parent = image.texture.handle;
|
||||
const GLenum target = ImageTarget(view_type, image.info.num_samples);
|
||||
glTextureView(handle, target, parent, internal_format, view_range.base.level,
|
||||
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
|
||||
if (!info.IsRenderTarget()) {
|
||||
ApplySwizzle(handle, format, info.Swizzle());
|
||||
}
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const std::string name = VideoCommon::Name(*this, view_type);
|
||||
const std::string name = VideoCommon::Name(*this);
|
||||
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
}
|
||||
stored_views.emplace_back().handle = handle;
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
@ -127,13 +128,12 @@ private:
|
|||
OGLTexture null_image_1d_array;
|
||||
OGLTexture null_image_cube_array;
|
||||
OGLTexture null_image_3d;
|
||||
OGLTexture null_image_rect;
|
||||
OGLTextureView null_image_view_1d;
|
||||
OGLTextureView null_image_view_2d;
|
||||
OGLTextureView null_image_view_2d_array;
|
||||
OGLTextureView null_image_view_cube;
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
|
@ -154,8 +154,6 @@ public:
|
|||
void UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint StorageHandle() noexcept;
|
||||
|
@ -170,7 +168,6 @@ private:
|
|||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
OGLTexture texture;
|
||||
OGLBuffer buffer;
|
||||
OGLTextureView store_view;
|
||||
GLenum gl_internal_format = GL_NONE;
|
||||
GLenum gl_format = GL_NONE;
|
||||
|
@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase {
|
|||
|
||||
public:
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
|
||||
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
||||
|
||||
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
|
||||
return views[static_cast<size_t>(query_type)];
|
||||
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
|
||||
return views[static_cast<size_t>(handle_type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] GLuint DefaultHandle() const noexcept {
|
||||
|
@ -198,15 +197,25 @@ public:
|
|||
return internal_format;
|
||||
}
|
||||
|
||||
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 BufferSize() const noexcept {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
|
||||
void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle,
|
||||
const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range);
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
|
||||
std::vector<OGLTextureView> stored_views;
|
||||
GLuint default_handle = 0;
|
||||
GLenum internal_format = GL_NONE;
|
||||
GLuint default_handle = 0;
|
||||
GPUVAddr gpu_addr = 0;
|
||||
u32 buffer_size = 0;
|
||||
};
|
||||
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
|
|
@ -5,12 +5,120 @@
|
|||
#pragma once
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace OpenGL::MaxwellToGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct FormatTuple {
|
||||
GLenum internal_format;
|
||||
GLenum format = GL_NONE;
|
||||
GLenum type = GL_NONE;
|
||||
};
|
||||
|
||||
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
|
||||
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
|
||||
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
|
||||
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
|
||||
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
|
||||
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
|
||||
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
|
||||
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
|
||||
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
|
||||
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
|
||||
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
|
||||
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
|
||||
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
|
||||
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
|
||||
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
|
||||
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
|
||||
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
|
||||
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
|
||||
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
|
||||
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
|
||||
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
|
||||
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
|
||||
}};
|
||||
|
||||
inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
|
||||
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
|
||||
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
|
||||
}
|
||||
|
||||
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case Maxwell::VertexAttribute::Type::UnsignedNorm:
|
||||
|
|
|
@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
|||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
|
||||
program_manager{device},
|
||||
rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
|
@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
OGLShader fragment_shader;
|
||||
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
|
||||
|
||||
vertex_program.Create(true, false, vertex_shader.handle);
|
||||
fragment_program.Create(true, false, fragment_shader.handle);
|
||||
|
||||
pipeline.Create();
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
|
||||
present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle);
|
||||
|
||||
// Generate presentation sampler
|
||||
present_sampler.Create();
|
||||
|
@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
// Set projection matrix
|
||||
const std::array ortho_matrix =
|
||||
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
|
||||
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
||||
std::data(ortho_matrix));
|
||||
program_manager.BindProgram(present_program.handle);
|
||||
glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
|
||||
|
||||
const auto& texcoords = screen_info.display_texcoords;
|
||||
auto left = texcoords.left;
|
||||
|
@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
state_tracker.NotifyClipControl();
|
||||
state_tracker.NotifyAlphaTest();
|
||||
|
||||
program_manager.BindHostPipeline(pipeline.handle);
|
||||
|
||||
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (screen_info.display_srgb) {
|
||||
|
@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
program_manager.RestoreGuestPipeline();
|
||||
// TODO
|
||||
// program_manager.RestoreGuestPipeline();
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderScreenshot() {
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -111,9 +110,7 @@ private:
|
|||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
OGLBuffer vertex_buffer;
|
||||
OGLProgram vertex_program;
|
||||
OGLProgram fragment_program;
|
||||
OGLPipeline pipeline;
|
||||
OGLProgram present_program;
|
||||
OGLFramebuffer screenshot_framebuffer;
|
||||
|
||||
// GPU address of the vertex buffer
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
|
||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
|
@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
|||
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
|
||||
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
|
||||
};
|
||||
program_manager.BindHostCompute(astc_decoder_program.handle);
|
||||
program_manager.BindProgram(astc_decoder_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
|
||||
|
||||
|
@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
|||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
program_manager.BindProgram(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
|
@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
|
|||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
program_manager.BindProgram(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
|
@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
|
|||
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
program_manager.BindProgram(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
|
@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
|
|||
static constexpr GLuint LOC_SRC_OFFSET = 0;
|
||||
static constexpr GLuint LOC_DST_OFFSET = 1;
|
||||
|
||||
program_manager.BindHostCompute(copy_bc4_program.handle);
|
||||
program_manager.BindProgram(copy_bc4_program.handle);
|
||||
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_subresource.base_layer == 0);
|
||||
|
@ -286,7 +285,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
|
|||
break;
|
||||
case 4: {
|
||||
// BGRA8 copy
|
||||
program_manager.BindHostCompute(copy_bgra_program.handle);
|
||||
program_manager.BindProgram(copy_bgra_program.handle);
|
||||
constexpr GLenum FORMAT = GL_RGBA8;
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_offset == zero_offset);
|
||||
|
|
|
@ -19,23 +19,6 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
struct TextureHandle {
|
||||
explicit TextureHandle(u32 data, bool via_header_index) {
|
||||
[[likely]] if (via_header_index) {
|
||||
image = data;
|
||||
sampler = data;
|
||||
}
|
||||
else {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {}
|
||||
|
|
|
@ -140,8 +140,8 @@ struct BufferCacheParams {
|
|||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
|
||||
static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false;
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
Common::ThreadWorker* thread_worker, const Shader::Info& info_,
|
||||
|
@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
secondary_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
|
||||
return TextureHandle{lhs_raw | rhs_raw, via_header_index};
|
||||
return TexturePair(lhs_raw | rhs_raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index};
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const TextureHandle handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.image);
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
}
|
||||
}};
|
||||
std::ranges::for_each(info.texture_buffer_descriptors, add_image);
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_image);
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const TextureHandle handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.image);
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices.push_back(handle.first);
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||
samplers.push_back(sampler->Handle());
|
||||
}
|
||||
}
|
||||
|
@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
ImageId* texture_buffer_ids{image_view_ids.data()};
|
||||
size_t index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) {
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
|
||||
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written);
|
||||
is_written, is_image);
|
||||
++texture_buffer_ids;
|
||||
++index;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) && defined(NDEBUG)
|
||||
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
|
||||
#else
|
||||
#define LAMBDA_FORCEINLINE
|
||||
|
@ -30,6 +30,7 @@ namespace {
|
|||
using boost::container::small_vector;
|
||||
using boost::container::static_vector;
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Tegra::Texture::TexturePair;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
|
||||
const u32 raw{lhs_raw | rhs_raw};
|
||||
return TextureHandle{raw, via_header_index};
|
||||
return TexturePair(raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index};
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const TextureHandle handle{read_handle(desc, index)};
|
||||
image_view_indices[image_index++] = handle.image;
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_index++] = handle.first;
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::has_texture_buffers) {
|
||||
|
@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const TextureHandle handle{read_handle(desc, index)};
|
||||
image_view_indices[image_index++] = handle.image;
|
||||
const auto handle{read_handle(desc, index)};
|
||||
image_view_indices[image_index++] = handle.first;
|
||||
|
||||
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)};
|
||||
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
|
||||
samplers[sampler_index++] = sampler->Handle();
|
||||
}
|
||||
}
|
||||
|
@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||
size_t index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (std::is_same_v<decltype(desc), const ImageBufferDescriptor&>) {
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
|
||||
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written);
|
||||
is_written, is_image);
|
||||
++index;
|
||||
++texture_buffer_index;
|
||||
}
|
||||
|
|
|
@ -342,28 +342,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
}
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||
GraphicsEnvironments environments;
|
||||
GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
|
||||
|
||||
main_pools.ReleaseContents();
|
||||
|
||||
std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs;
|
||||
boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs;
|
||||
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (graphics_key.unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
auto& env{graphics_envs[index]};
|
||||
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
|
||||
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
env.SetCachedSize(shader_infos[index]->size_bytes);
|
||||
envs.push_back(&env);
|
||||
}
|
||||
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
|
||||
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
|
||||
if (pipeline_cache_filename.empty()) {
|
||||
return pipeline;
|
||||
}
|
||||
serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] {
|
||||
serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
|
||||
boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
|
||||
env_ptrs;
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
|
|
|
@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
|
|||
return scissor;
|
||||
}
|
||||
|
||||
struct TextureHandle {
|
||||
constexpr TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
|
||||
bool is_indexed) {
|
||||
DrawParams params{
|
||||
|
|
|
@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() {
|
|||
return MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
|
||||
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
|
||||
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
|
||||
size_t env_index{};
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
|
||||
if (unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
|
||||
auto& env{result.envs[index]};
|
||||
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
|
||||
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
env.SetCachedSize(shader_infos[index]->size_bytes);
|
||||
result.env_ptrs[env_index++] = &env;
|
||||
}
|
||||
}
|
||||
|
||||
ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
|
|
|
@ -4,14 +4,18 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
|
@ -30,6 +34,8 @@ class ShaderCache {
|
|||
static constexpr u64 PAGE_BITS = 14;
|
||||
static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
|
||||
|
||||
static constexpr size_t NUM_PROGRAMS = 6;
|
||||
|
||||
struct Entry {
|
||||
VAddr addr_start;
|
||||
VAddr addr_end;
|
||||
|
@ -58,6 +64,15 @@ public:
|
|||
void SyncGuestHost();
|
||||
|
||||
protected:
|
||||
struct GraphicsEnvironments {
|
||||
std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
|
||||
std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
|
||||
|
||||
std::span<Shader::Environment* const> Span() const noexcept {
|
||||
return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
|
||||
}
|
||||
};
|
||||
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_);
|
||||
|
@ -65,17 +80,21 @@ protected:
|
|||
/// @brief Update the hashes and information of shader stages
|
||||
/// @param unique_hashes Shader hashes to store into when a stage is enabled
|
||||
/// @return True no success, false on error
|
||||
bool RefreshStages(std::array<u64, 6>& unique_hashes);
|
||||
bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
|
||||
|
||||
/// @brief Returns information about the current compute shader
|
||||
/// @return Pointer to a valid shader, nullptr on error
|
||||
const ShaderInfo* ComputeShader();
|
||||
|
||||
/// @brief Collect the current graphics environments
|
||||
void GetGraphicsEnvironments(GraphicsEnvironments& result,
|
||||
const std::array<u64, NUM_PROGRAMS>& unique_hashes);
|
||||
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
|
||||
std::array<const ShaderInfo*, 6> shader_infos{};
|
||||
std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
|
||||
bool last_shaders_valid = false;
|
||||
|
||||
private:
|
||||
|
|
|
@ -187,8 +187,8 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
|
|||
|
||||
Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
|
||||
bool via_header_index, u32 raw) {
|
||||
const TextureHandle handle{raw, via_header_index};
|
||||
const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
|
||||
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
|
||||
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
|
||||
Tegra::Texture::TICEntry entry;
|
||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||
const Shader::TextureType result{ConvertType(entry)};
|
||||
|
|
|
@ -29,22 +29,6 @@ class Memorymanager;
|
|||
|
||||
namespace VideoCommon {
|
||||
|
||||
struct TextureHandle {
|
||||
explicit TextureHandle(u32 data, bool via_header_index) {
|
||||
if (via_header_index) {
|
||||
image = data;
|
||||
sampler = data;
|
||||
} else {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
class GenericEnvironment : public Shader::Environment {
|
||||
public:
|
||||
explicit GenericEnvironment() = default;
|
||||
|
|
|
@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
|
|||
return "Invalid";
|
||||
}
|
||||
|
||||
std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
|
||||
std::string Name(const ImageViewBase& image_view) {
|
||||
const u32 width = image_view.size.width;
|
||||
const u32 height = image_view.size.height;
|
||||
const u32 depth = image_view.size.depth;
|
||||
|
@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
|
|||
const u32 num_layers = image_view.range.extent.layers;
|
||||
|
||||
const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
|
||||
switch (type.value_or(image_view.type)) {
|
||||
switch (image_view.type) {
|
||||
case ImageViewType::e1D:
|
||||
return fmt::format("ImageView 1D {}{}", width, level);
|
||||
case ImageViewType::e2D:
|
||||
|
|
|
@ -255,8 +255,7 @@ struct RenderTargets;
|
|||
|
||||
[[nodiscard]] std::string Name(const ImageBase& image);
|
||||
|
||||
[[nodiscard]] std::string Name(const ImageViewBase& image_view,
|
||||
std::optional<ImageViewType> type = std::nullopt);
|
||||
[[nodiscard]] std::string Name(const ImageViewBase& image_view);
|
||||
|
||||
[[nodiscard]] std::string Name(const RenderTargets& render_targets);
|
||||
|
||||
|
|
|
@ -154,6 +154,15 @@ union TextureHandle {
|
|||
};
|
||||
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
|
||||
|
||||
[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
|
||||
if (via_header_index) {
|
||||
return {raw, raw};
|
||||
} else {
|
||||
const Tegra::Texture::TextureHandle handle{raw};
|
||||
return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
|
||||
}
|
||||
}
|
||||
|
||||
struct TICEntry {
|
||||
union {
|
||||
struct {
|
||||
|
|
|
@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
.storageBuffer16BitAccess = false,
|
||||
.storageBuffer16BitAccess = true,
|
||||
.uniformAndStorageBuffer16BitAccess = true,
|
||||
.storagePushConstant16 = false,
|
||||
.storageInputOutput16 = false,
|
||||
|
|
Loading…
Reference in a new issue