Merge pull request #4105 from ReinUsesLisp/resident-buffers
gl_rasterizer: Use NV_vertex_buffer_unified_memory for vertex buffer robustness
This commit is contained in:
commit
0e1268e507
14 changed files with 277 additions and 213 deletions
|
@ -41,7 +41,11 @@ class BufferCache {
|
||||||
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using BufferInfo = std::pair<BufferType, u64>;
|
struct BufferInfo {
|
||||||
|
BufferType handle;
|
||||||
|
u64 offset;
|
||||||
|
u64 address;
|
||||||
|
};
|
||||||
|
|
||||||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||||
bool is_written = false, bool use_fast_cbuf = false) {
|
bool is_written = false, bool use_fast_cbuf = false) {
|
||||||
|
@ -50,7 +54,7 @@ public:
|
||||||
auto& memory_manager = system.GPU().MemoryManager();
|
auto& memory_manager = system.GPU().MemoryManager();
|
||||||
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||||
if (!cpu_addr_opt) {
|
if (!cpu_addr_opt) {
|
||||||
return {GetEmptyBuffer(size), 0};
|
return GetEmptyBuffer(size);
|
||||||
}
|
}
|
||||||
const VAddr cpu_addr = *cpu_addr_opt;
|
const VAddr cpu_addr = *cpu_addr_opt;
|
||||||
|
|
||||||
|
@ -88,7 +92,7 @@ public:
|
||||||
Buffer* const block = GetBlock(cpu_addr, size);
|
Buffer* const block = GetBlock(cpu_addr, size);
|
||||||
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||||
if (!map) {
|
if (!map) {
|
||||||
return {GetEmptyBuffer(size), 0};
|
return GetEmptyBuffer(size);
|
||||||
}
|
}
|
||||||
if (is_written) {
|
if (is_written) {
|
||||||
map->MarkAsModified(true, GetModifiedTicks());
|
map->MarkAsModified(true, GetModifiedTicks());
|
||||||
|
@ -101,7 +105,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))};
|
return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||||
|
@ -254,27 +258,17 @@ public:
|
||||||
committed_flushes.pop_front();
|
committed_flushes.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
|
virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
std::unique_ptr<StreamBuffer> stream_buffer_)
|
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)},
|
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
|
||||||
stream_buffer_handle{stream_buffer->Handle()} {}
|
|
||||||
|
|
||||||
~BufferCache() = default;
|
~BufferCache() = default;
|
||||||
|
|
||||||
virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||||
|
|
||||||
virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) = 0;
|
|
||||||
|
|
||||||
virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) = 0;
|
|
||||||
|
|
||||||
virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) = 0;
|
|
||||||
|
|
||||||
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
|
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -336,11 +330,11 @@ private:
|
||||||
const VAddr cpu_addr_end = cpu_addr + size;
|
const VAddr cpu_addr_end = cpu_addr + size;
|
||||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||||
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||||
UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr);
|
block->Upload(block->Offset(cpu_addr), size, host_ptr);
|
||||||
} else {
|
} else {
|
||||||
staging_buffer.resize(size);
|
staging_buffer.resize(size);
|
||||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||||
UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data());
|
block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
|
||||||
}
|
}
|
||||||
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
|
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
|
||||||
}
|
}
|
||||||
|
@ -399,7 +393,7 @@ private:
|
||||||
}
|
}
|
||||||
staging_buffer.resize(size);
|
staging_buffer.resize(size);
|
||||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||||
UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data());
|
block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,7 +430,7 @@ private:
|
||||||
|
|
||||||
const std::size_t size = map->end - map->start;
|
const std::size_t size = map->end - map->start;
|
||||||
staging_buffer.resize(size);
|
staging_buffer.resize(size);
|
||||||
DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data());
|
block->Download(block->Offset(map->start), size, staging_buffer.data());
|
||||||
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
|
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
|
||||||
map->MarkAsModified(false, 0);
|
map->MarkAsModified(false, 0);
|
||||||
}
|
}
|
||||||
|
@ -449,7 +443,7 @@ private:
|
||||||
|
|
||||||
buffer_ptr += size;
|
buffer_ptr += size;
|
||||||
buffer_offset += size;
|
buffer_offset += size;
|
||||||
return {stream_buffer_handle, uploaded_offset};
|
return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
|
||||||
}
|
}
|
||||||
|
|
||||||
void AlignBuffer(std::size_t alignment) {
|
void AlignBuffer(std::size_t alignment) {
|
||||||
|
@ -464,7 +458,7 @@ private:
|
||||||
const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
|
const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
|
||||||
const VAddr cpu_addr = buffer->CpuAddr();
|
const VAddr cpu_addr = buffer->CpuAddr();
|
||||||
std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
|
std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
|
||||||
CopyBlock(*buffer, *new_buffer, 0, 0, old_size);
|
new_buffer->CopyFrom(*buffer, 0, 0, old_size);
|
||||||
QueueDestruction(std::move(buffer));
|
QueueDestruction(std::move(buffer));
|
||||||
|
|
||||||
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
||||||
|
@ -486,8 +480,8 @@ private:
|
||||||
const std::size_t new_size = size_1 + size_2;
|
const std::size_t new_size = size_1 + size_2;
|
||||||
|
|
||||||
std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
|
std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
|
||||||
CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1);
|
new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
|
||||||
CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2);
|
new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
|
||||||
QueueDestruction(std::move(first));
|
QueueDestruction(std::move(first));
|
||||||
QueueDestruction(std::move(second));
|
QueueDestruction(std::move(second));
|
||||||
|
|
||||||
|
|
|
@ -22,21 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||||
|
|
||||||
Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} {
|
Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||||
|
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||||
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer::~Buffer() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
|
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
|
||||||
|
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
||||||
|
data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||||
|
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||||
|
glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
||||||
|
data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
|
std::size_t size) const {
|
||||||
|
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||||
|
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||||
|
}
|
||||||
|
|
||||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
const Device& device, std::size_t stream_size)
|
const Device& device_, std::size_t stream_size)
|
||||||
: GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
|
: GenericBufferCache{rasterizer, system,
|
||||||
|
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
|
||||||
|
device{device_} {
|
||||||
if (!device.HasFastBufferSubData()) {
|
if (!device.HasFastBufferSubData()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||||
for (const GLuint cbuf : cbufs) {
|
for (const GLuint cbuf : cbufs) {
|
||||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||||
|
@ -48,39 +73,20 @@ OGLBufferCache::~OGLBufferCache() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
return std::make_shared<Buffer>(cpu_addr, size);
|
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||||
return 0;
|
return {0, 0, 0};
|
||||||
}
|
|
||||||
|
|
||||||
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) {
|
|
||||||
glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
|
|
||||||
static_cast<GLsizeiptr>(size), data);
|
|
||||||
}
|
|
||||||
|
|
||||||
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) {
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
|
||||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
|
||||||
glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
|
|
||||||
static_cast<GLsizeiptr>(size), data);
|
|
||||||
}
|
|
||||||
|
|
||||||
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) {
|
|
||||||
glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset),
|
|
||||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||||
std::size_t size) {
|
std::size_t size) {
|
||||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||||
|
|
||||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||||
return {cbuf, 0};
|
return {cbuf, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -25,15 +25,27 @@ class RasterizerOpenGL;
|
||||||
|
|
||||||
class Buffer : public VideoCommon::BufferBlock {
|
class Buffer : public VideoCommon::BufferBlock {
|
||||||
public:
|
public:
|
||||||
explicit Buffer(VAddr cpu_addr, const std::size_t size);
|
explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
|
||||||
~Buffer();
|
~Buffer();
|
||||||
|
|
||||||
GLuint Handle() const {
|
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
|
||||||
|
|
||||||
|
void Download(std::size_t offset, std::size_t size, u8* data) const;
|
||||||
|
|
||||||
|
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
|
std::size_t size) const;
|
||||||
|
|
||||||
|
GLuint Handle() const noexcept {
|
||||||
return gl_buffer.handle;
|
return gl_buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const noexcept {
|
||||||
|
return gpu_address;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
u64 gpu_address = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||||
|
@ -43,7 +55,7 @@ public:
|
||||||
const Device& device, std::size_t stream_size);
|
const Device& device, std::size_t stream_size);
|
||||||
~OGLBufferCache();
|
~OGLBufferCache();
|
||||||
|
|
||||||
GLuint GetEmptyBuffer(std::size_t) override;
|
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||||
|
|
||||||
void Acquire() noexcept {
|
void Acquire() noexcept {
|
||||||
cbuf_cursor = 0;
|
cbuf_cursor = 0;
|
||||||
|
@ -52,22 +64,16 @@ public:
|
||||||
protected:
|
protected:
|
||||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||||
|
|
||||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) override;
|
|
||||||
|
|
||||||
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) override;
|
|
||||||
|
|
||||||
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) override;
|
|
||||||
|
|
||||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||||
|
|
||||||
|
const Device& device;
|
||||||
|
|
||||||
std::size_t cbuf_cursor = 0;
|
std::size_t cbuf_cursor = 0;
|
||||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
|
|
||||||
cbufs;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -188,16 +188,32 @@ bool IsASTCSupported() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// @brief Returns true when a GL_RENDERER is a Turing GPU
|
||||||
|
/// @param renderer GL_RENDERER string
|
||||||
|
bool IsTuring(std::string_view renderer) {
|
||||||
|
static constexpr std::array<std::string_view, 12> TURING_GPUS = {
|
||||||
|
"GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070",
|
||||||
|
"RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000",
|
||||||
|
"Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4",
|
||||||
|
};
|
||||||
|
return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(),
|
||||||
|
[renderer](std::string_view candidate) {
|
||||||
|
return renderer.find(candidate) != std::string_view::npos;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
Device::Device()
|
Device::Device()
|
||||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||||
|
const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||||
const std::vector extensions = GetExtensions();
|
const std::vector extensions = GetExtensions();
|
||||||
|
|
||||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||||
|
const bool is_turing = is_nvidia && IsTuring(renderer);
|
||||||
|
|
||||||
bool disable_fast_buffer_sub_data = false;
|
bool disable_fast_buffer_sub_data = false;
|
||||||
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
|
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
|
||||||
|
@ -221,8 +237,16 @@ Device::Device()
|
||||||
has_variable_aoffi = TestVariableAoffi();
|
has_variable_aoffi = TestVariableAoffi();
|
||||||
has_component_indexing_bug = is_amd;
|
has_component_indexing_bug = is_amd;
|
||||||
has_precise_bug = TestPreciseBug();
|
has_precise_bug = TestPreciseBug();
|
||||||
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
|
||||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||||
|
|
||||||
|
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||||
|
// uniform buffers as "push constants"
|
||||||
|
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||||
|
|
||||||
|
// Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on
|
||||||
|
// DeleteBuffers. Disable unified memory on these devices.
|
||||||
|
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing;
|
||||||
|
|
||||||
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||||
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
|
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
|
||||||
GLAD_GL_NV_transform_feedback2;
|
GLAD_GL_NV_transform_feedback2;
|
||||||
|
|
|
@ -72,6 +72,10 @@ public:
|
||||||
return has_texture_shadow_lod;
|
return has_texture_shadow_lod;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasVertexBufferUnifiedMemory() const {
|
||||||
|
return has_vertex_buffer_unified_memory;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasASTC() const {
|
bool HasASTC() const {
|
||||||
return has_astc;
|
return has_astc;
|
||||||
}
|
}
|
||||||
|
@ -115,6 +119,7 @@ private:
|
||||||
bool has_vertex_viewport_layer{};
|
bool has_vertex_viewport_layer{};
|
||||||
bool has_image_load_formatted{};
|
bool has_image_load_formatted{};
|
||||||
bool has_texture_shadow_lod{};
|
bool has_texture_shadow_lod{};
|
||||||
|
bool has_vertex_buffer_unified_memory{};
|
||||||
bool has_astc{};
|
bool has_astc{};
|
||||||
bool has_variable_aoffi{};
|
bool has_variable_aoffi{};
|
||||||
bool has_component_indexing_bug{};
|
bool has_component_indexing_bug{};
|
||||||
|
|
|
@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||||
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||||
|
|
||||||
constexpr std::size_t NumSupportedVertexAttributes = 16;
|
constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||||
|
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||||
|
|
||||||
template <typename Engine, typename Entry>
|
template <typename Engine, typename Entry>
|
||||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||||
|
@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
||||||
// avoid OpenGL errors.
|
// avoid OpenGL errors.
|
||||||
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
||||||
// assume every shader uses them all.
|
// assume every shader uses them all.
|
||||||
for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
|
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
|
||||||
if (!flags[Dirty::VertexFormat0 + index]) {
|
if (!flags[Dirty::VertexFormat0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -231,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||||
|
|
||||||
|
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
|
||||||
|
|
||||||
// Upload all guest vertex arrays sequentially to our buffer
|
// Upload all guest vertex arrays sequentially to our buffer
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
|
||||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -246,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
|
|
||||||
const GPUVAddr start = vertex_array.StartAddress();
|
const GPUVAddr start = vertex_array.StartAddress();
|
||||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||||
|
|
||||||
ASSERT(end >= start);
|
ASSERT(end >= start);
|
||||||
|
|
||||||
|
const GLuint gl_index = static_cast<GLuint>(index);
|
||||||
const u64 size = end - start;
|
const u64 size = end - start;
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
|
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||||
|
if (use_unified_memory) {
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
|
const auto info = buffer_cache.UploadMemory(start, size);
|
||||||
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
|
if (use_unified_memory) {
|
||||||
vertex_array.stride);
|
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
|
||||||
|
info.address + info.offset, size);
|
||||||
|
} else {
|
||||||
|
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -268,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
|
||||||
flags[Dirty::VertexInstances] = false;
|
flags[Dirty::VertexInstances] = false;
|
||||||
|
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
|
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
|
||||||
if (!flags[Dirty::VertexInstance0 + index]) {
|
if (!flags[Dirty::VertexInstance0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -285,9 +297,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
const std::size_t size = CalculateIndexBufferSize();
|
const std::size_t size = CalculateIndexBufferSize();
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
|
||||||
return offset;
|
return info.offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
|
@ -643,9 +655,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
if (!device.UseAssemblyShaders()) {
|
if (!device.UseAssemblyShaders()) {
|
||||||
MaxwellUniformData ubo;
|
MaxwellUniformData ubo;
|
||||||
ubo.SetFromRegs(gpu);
|
ubo.SetFromRegs(gpu);
|
||||||
const auto [buffer, offset] =
|
const auto info =
|
||||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
|
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -956,8 +968,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
|
||||||
buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -970,24 +981,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
|
|
||||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||||
const GPUVAddr gpu_addr = buffer.address;
|
const GPUVAddr gpu_addr = buffer.address;
|
||||||
auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
UNIMPLEMENTED_IF(use_unified);
|
UNIMPLEMENTED_IF(use_unified);
|
||||||
if (offset != 0) {
|
if (info.offset != 0) {
|
||||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
|
||||||
cbuf = staging_cbuf;
|
info.handle = staging_cbuf;
|
||||||
offset = 0;
|
info.offset = 0;
|
||||||
}
|
}
|
||||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_unified) {
|
if (use_unified) {
|
||||||
glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
|
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
|
||||||
|
unified_offset, size);
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1023,9 +1035,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr gpu_addr, std::size_t size) {
|
||||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const auto [ssbo, buffer_offset] =
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
|
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1712,8 +1723,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
|
||||||
const GLuint handle = transform_feedback_buffers[index].handle;
|
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||||
const GPUVAddr gpu_addr = binding.Address();
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
const std::size_t size = binding.buffer_size;
|
const std::size_t size = binding.buffer_size;
|
||||||
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
|
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
|
||||||
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,11 +2,13 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <deque>
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||||
|
@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
|
OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
|
||||||
bool use_persistent)
|
|
||||||
: buffer_size(size) {
|
: buffer_size(size) {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
|
|
||||||
|
@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
|
||||||
allocate_size *= 2;
|
allocate_size *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_persistent) {
|
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||||
persistent = true;
|
|
||||||
coherent = prefer_coherent;
|
|
||||||
const GLbitfield flags =
|
|
||||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
|
||||||
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
||||||
mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
|
mapped_ptr = static_cast<u8*>(
|
||||||
gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
|
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||||
} else {
|
|
||||||
glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||||
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||||
if (persistent) {
|
|
||||||
glUnmapNamedBuffer(gl_buffer.handle);
|
glUnmapNamedBuffer(gl_buffer.handle);
|
||||||
}
|
|
||||||
gl_buffer.Release();
|
gl_buffer.Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
|
||||||
|
|
||||||
bool invalidate = false;
|
bool invalidate = false;
|
||||||
if (buffer_pos + size > buffer_size) {
|
if (buffer_pos + size > buffer_size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||||
|
glInvalidateBufferData(gl_buffer.handle);
|
||||||
|
|
||||||
buffer_pos = 0;
|
buffer_pos = 0;
|
||||||
invalidate = true;
|
invalidate = true;
|
||||||
|
|
||||||
if (persistent) {
|
|
||||||
glUnmapNamedBuffer(gl_buffer.handle);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (invalidate || !persistent) {
|
return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
|
||||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
|
||||||
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
|
||||||
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
|
|
||||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
|
||||||
mapped_ptr = static_cast<u8*>(
|
|
||||||
glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
|
|
||||||
mapped_offset = buffer_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||||
ASSERT(size <= mapped_size);
|
ASSERT(size <= mapped_size);
|
||||||
|
|
||||||
if (!coherent && size > 0) {
|
if (size > 0) {
|
||||||
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
|
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
|
||||||
}
|
|
||||||
|
|
||||||
if (!persistent) {
|
|
||||||
glUnmapNamedBuffer(gl_buffer.handle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer_pos += size;
|
buffer_pos += size;
|
||||||
|
|
|
@ -11,10 +11,11 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
|
||||||
class OGLStreamBuffer : private NonCopyable {
|
class OGLStreamBuffer : private NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
|
explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
|
||||||
bool use_persistent = true);
|
|
||||||
~OGLStreamBuffer();
|
~OGLStreamBuffer();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -33,19 +34,20 @@ public:
|
||||||
return gl_buffer.handle;
|
return gl_buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLsizeiptr Size() const {
|
u64 Address() const {
|
||||||
|
return gpu_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLsizeiptr Size() const noexcept {
|
||||||
return buffer_size;
|
return buffer_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
|
||||||
bool coherent = false;
|
GLuint64EXT gpu_address = 0;
|
||||||
bool persistent = false;
|
|
||||||
|
|
||||||
GLintptr buffer_pos = 0;
|
GLintptr buffer_pos = 0;
|
||||||
GLsizeiptr buffer_size = 0;
|
GLsizeiptr buffer_size = 0;
|
||||||
GLintptr mapped_offset = 0;
|
|
||||||
GLsizeiptr mapped_size = 0;
|
GLsizeiptr mapped_size = 0;
|
||||||
u8* mapped_ptr = nullptr;
|
u8* mapped_ptr = nullptr;
|
||||||
};
|
};
|
||||||
|
|
|
@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||||
|
|
||||||
// Clear screen to black
|
// Clear screen to black
|
||||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||||
|
|
||||||
|
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||||
|
|
||||||
|
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||||
|
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||||
|
&vertex_buffer_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererOpenGL::AddTelemetryFields() {
|
void RendererOpenGL::AddTelemetryFields() {
|
||||||
|
@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||||
offsetof(ScreenRectVertex, tex_coord));
|
offsetof(ScreenRectVertex, tex_coord));
|
||||||
glVertexAttribBinding(PositionLocation, 0);
|
glVertexAttribBinding(PositionLocation, 0);
|
||||||
glVertexAttribBinding(TexCoordLocation, 0);
|
glVertexAttribBinding(TexCoordLocation, 0);
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
|
||||||
|
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
|
||||||
|
sizeof(vertices));
|
||||||
|
} else {
|
||||||
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
||||||
|
}
|
||||||
|
|
||||||
glBindTextureUnit(0, screen_info.display_texture);
|
glBindTextureUnit(0, screen_info.display_texture);
|
||||||
glBindSampler(0, 0);
|
glBindSampler(0, 0);
|
||||||
|
|
|
@ -107,6 +107,9 @@ private:
|
||||||
OGLPipeline pipeline;
|
OGLPipeline pipeline;
|
||||||
OGLFramebuffer screenshot_framebuffer;
|
OGLFramebuffer screenshot_framebuffer;
|
||||||
|
|
||||||
|
// GPU address of the vertex buffer
|
||||||
|
GLuint64EXT vertex_buffer_address = 0;
|
||||||
|
|
||||||
/// Display information for Switch screen
|
/// Display information for Switch screen
|
||||||
ScreenInfo screen_info;
|
ScreenInfo screen_info;
|
||||||
|
|
||||||
|
|
|
@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
|
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
|
||||||
std::size_t size)
|
VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
|
||||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
: VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
|
||||||
VkBufferCreateInfo ci;
|
VkBufferCreateInfo ci;
|
||||||
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
ci.pNext = nullptr;
|
ci.pNext = nullptr;
|
||||||
|
@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp
|
||||||
|
|
||||||
Buffer::~Buffer() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
|
||||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
|
||||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
|
|
||||||
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
|
|
||||||
CreateStreamBuffer(device,
|
|
||||||
scheduler)},
|
|
||||||
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
|
|
||||||
staging_pool} {}
|
|
||||||
|
|
||||||
VKBufferCache::~VKBufferCache() = default;
|
|
||||||
|
|
||||||
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
|
||||||
return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
|
||||||
size = std::max(size, std::size_t(4));
|
|
||||||
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
|
||||||
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
|
|
||||||
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
|
||||||
});
|
|
||||||
return *empty.handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) {
|
|
||||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
std::memcpy(staging.commit->Map(size), data, size);
|
std::memcpy(staging.commit->Map(size), data, size);
|
||||||
|
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
|
|
||||||
size](vk::CommandBuffer cmdbuf) {
|
const VkBuffer handle = Handle();
|
||||||
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
|
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
|
||||||
|
|
||||||
VkBufferMemoryBarrier barrier;
|
VkBufferMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
|
@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
|
||||||
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
|
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
|
||||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.buffer = buffer;
|
barrier.buffer = handle;
|
||||||
barrier.offset = offset;
|
barrier.offset = offset;
|
||||||
barrier.size = size;
|
barrier.size = size;
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
||||||
|
@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
|
||||||
u8* data) {
|
|
||||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
|
|
||||||
size](vk::CommandBuffer cmdbuf) {
|
const VkBuffer handle = Handle();
|
||||||
|
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
VkBufferMemoryBarrier barrier;
|
VkBufferMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
barrier.pNext = nullptr;
|
barrier.pNext = nullptr;
|
||||||
|
@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
barrier.buffer = buffer;
|
barrier.buffer = handle;
|
||||||
barrier.offset = offset;
|
barrier.offset = offset;
|
||||||
barrier.size = size;
|
barrier.size = size;
|
||||||
|
|
||||||
|
@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
|
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
|
||||||
cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
|
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
|
||||||
});
|
});
|
||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
|
|
||||||
std::memcpy(data, staging.commit->Map(size), size);
|
std::memcpy(data, staging.commit->Map(size), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
std::size_t dst_offset, std::size_t size) {
|
std::size_t size) const {
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset,
|
|
||||||
|
const VkBuffer dst_buffer = Handle();
|
||||||
|
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
|
||||||
size](vk::CommandBuffer cmdbuf) {
|
size](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
|
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
|
||||||
|
|
||||||
|
@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
|
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||||
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
|
||||||
|
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
|
||||||
|
CreateStreamBuffer(device,
|
||||||
|
scheduler)},
|
||||||
|
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
|
||||||
|
staging_pool} {}
|
||||||
|
|
||||||
|
VKBufferCache::~VKBufferCache() = default;
|
||||||
|
|
||||||
|
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
|
return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
|
||||||
|
size);
|
||||||
|
}
|
||||||
|
|
||||||
|
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||||
|
size = std::max(size, std::size_t(4));
|
||||||
|
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
||||||
|
});
|
||||||
|
return {*empty.handle, 0, 0};
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -25,15 +25,29 @@ class VKScheduler;
|
||||||
|
|
||||||
class Buffer final : public VideoCommon::BufferBlock {
|
class Buffer final : public VideoCommon::BufferBlock {
|
||||||
public:
|
public:
|
||||||
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
|
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
|
||||||
std::size_t size);
|
VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
|
||||||
~Buffer();
|
~Buffer();
|
||||||
|
|
||||||
|
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
|
||||||
|
|
||||||
|
void Download(std::size_t offset, std::size_t size, u8* data) const;
|
||||||
|
|
||||||
|
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||||
|
std::size_t size) const;
|
||||||
|
|
||||||
VkBuffer Handle() const {
|
VkBuffer Handle() const {
|
||||||
return *buffer.handle;
|
return *buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
VKStagingBufferPool& staging_pool;
|
||||||
|
|
||||||
VKBuffer buffer;
|
VKBuffer buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -44,20 +58,11 @@ public:
|
||||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
||||||
~VKBufferCache();
|
~VKBufferCache();
|
||||||
|
|
||||||
VkBuffer GetEmptyBuffer(std::size_t size) override;
|
BufferInfo GetEmptyBuffer(std::size_t size) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||||
|
|
||||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
const u8* data) override;
|
|
||||||
|
|
||||||
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
|
||||||
u8* data) override;
|
|
||||||
|
|
||||||
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
|
||||||
std::size_t dst_offset, std::size_t size) override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const VKDevice& device;
|
const VKDevice& device;
|
||||||
VKMemoryManager& memory_manager;
|
VKMemoryManager& memory_manager;
|
||||||
|
|
|
@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
||||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||||
|
|
||||||
const GPUVAddr gpu_addr = binding.Address();
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
const auto size = static_cast<VkDeviceSize>(binding.buffer_size);
|
const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
|
|
||||||
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
||||||
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
||||||
});
|
});
|
||||||
|
@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
|
||||||
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
|
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
|
const auto info = buffer_cache.UploadMemory(start, size);
|
||||||
buffer_bindings.AddVertexBinding(buffer, offset);
|
buffer_bindings.AddVertexBinding(info.handle, info.offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
VkBuffer buffer = info.handle;
|
||||||
|
u64 offset = info.offset;
|
||||||
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
||||||
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
||||||
|
|
||||||
|
@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
VkBuffer buffer = info.handle;
|
||||||
|
u64 offset = info.offset;
|
||||||
|
|
||||||
auto format = regs.index_array.format;
|
auto format = regs.index_array.format;
|
||||||
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
||||||
|
@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
|
||||||
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
||||||
ASSERT(size <= MaxConstbufferSize);
|
ASSERT(size <= MaxConstbufferSize);
|
||||||
|
|
||||||
const auto [buffer_handle, offset] =
|
const auto info =
|
||||||
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
|
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
|
||||||
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||||
update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
||||||
|
@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
|
||||||
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
||||||
// default buffer.
|
// default buffer.
|
||||||
static constexpr std::size_t dummy_size = 4;
|
static constexpr std::size_t dummy_size = 4;
|
||||||
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
|
const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
|
||||||
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(
|
const auto info = buffer_cache.UploadMemory(
|
||||||
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
||||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
||||||
|
|
|
@ -35,10 +35,14 @@ public:
|
||||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||||
void Unmap(u64 size);
|
void Unmap(u64 size);
|
||||||
|
|
||||||
VkBuffer Handle() const {
|
VkBuffer Handle() const noexcept {
|
||||||
return *buffer;
|
return *buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const noexcept {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Watch final {
|
struct Watch final {
|
||||||
VKFenceWatch fence;
|
VKFenceWatch fence;
|
||||||
|
|
Loading…
Reference in a new issue