1
1
Fork 0
forked from suyu/suyu

Merge pull request #12761 from liamwhite/mp-composite

video_core: rewrite presentation for layer composition
This commit is contained in:
Fernando S 2024-02-02 15:08:06 +01:00 committed by GitHub
commit 58cf2ee1f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
90 changed files with 3671 additions and 3501 deletions

View file

@ -775,6 +775,9 @@ add_library(core STATIC
hle/service/nvnflinger/graphic_buffer_producer.h hle/service/nvnflinger/graphic_buffer_producer.h
hle/service/nvnflinger/hos_binder_driver_server.cpp hle/service/nvnflinger/hos_binder_driver_server.cpp
hle/service/nvnflinger/hos_binder_driver_server.h hle/service/nvnflinger/hos_binder_driver_server.h
hle/service/nvnflinger/hardware_composer.cpp
hle/service/nvnflinger/hardware_composer.h
hle/service/nvnflinger/hwc_layer.h
hle/service/nvnflinger/nvnflinger.cpp hle/service/nvnflinger/nvnflinger.cpp
hle/service/nvnflinger/nvnflinger.h hle/service/nvnflinger/nvnflinger.h
hle/service/nvnflinger/parcel.h hle/service/nvnflinger/parcel.h

View file

@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/small_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h" #include "core/core.h"
@ -38,19 +40,30 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, void nvdisp_disp0::Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers) {
u32 height, u32 stride, android::BufferTransformFlags transform, std::vector<Tegra::FramebufferConfig> output_layers;
const Common::Rectangle<int>& crop_rect, std::vector<Service::Nvidia::NvFence> output_fences;
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { output_layers.reserve(sorted_layers.size());
const DAddr addr = nvmap.GetHandleAddress(buffer_handle); output_fences.reserve(sorted_layers.size());
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
addr, offset, width, height, stride, format);
const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, for (auto& layer : sorted_layers) {
stride, format, transform, crop_rect}; output_layers.emplace_back(Tegra::FramebufferConfig{
.address = nvmap.GetHandleAddress(layer.buffer_handle),
.offset = layer.offset,
.width = layer.width,
.height = layer.height,
.stride = layer.stride,
.pixel_format = layer.format,
.transform_flags = layer.transform,
.crop_rect = layer.crop_rect,
});
system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); for (size_t i = 0; i < layer.acquire_fence.num_fences; i++) {
output_fences.push_back(layer.acquire_fence.fences[i]);
}
}
system.GPU().RequestComposite(std::move(output_layers), std::move(output_fences));
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().EndSystemFrame();
system.GetPerfStats().BeginSystemFrame(); system.GetPerfStats().BeginSystemFrame();

View file

@ -8,8 +8,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "core/hle/service/nvnflinger/buffer_transform_flags.h" #include "core/hle/service/nvnflinger/hwc_layer.h"
#include "core/hle/service/nvnflinger/pixel_format.h"
namespace Service::Nvidia::NvCore { namespace Service::Nvidia::NvCore {
class Container; class Container;
@ -35,11 +34,8 @@ public:
void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
void OnClose(DeviceFD fd) override; void OnClose(DeviceFD fd) override;
/// Performs a screen flip, drawing the buffer pointed to by the handle. /// Performs a screen flip, compositing each buffer.
void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, void Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers);
u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
Kernel::KEvent* QueryEvent(u32 event_id) override; Kernel::KEvent* QueryEvent(u32 event_id) override;

View file

@ -0,0 +1,215 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <boost/container/small_vector.hpp>
#include "common/microprofile.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvnflinger/buffer_item.h"
#include "core/hle/service/nvnflinger/buffer_item_consumer.h"
#include "core/hle/service/nvnflinger/buffer_queue_producer.h"
#include "core/hle/service/nvnflinger/hardware_composer.h"
#include "core/hle/service/nvnflinger/hwc_layer.h"
#include "core/hle/service/nvnflinger/ui/graphic_buffer.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::Nvnflinger {
namespace {
s32 NormalizeSwapInterval(f32* out_speed_scale, s32 swap_interval) {
if (swap_interval <= 0) {
// As an extension, treat nonpositive swap interval as speed multiplier.
if (out_speed_scale) {
*out_speed_scale = 2.f * static_cast<f32>(1 - swap_interval);
}
swap_interval = 1;
}
if (swap_interval >= 5) {
// As an extension, treat high swap interval as precise speed control.
if (out_speed_scale) {
*out_speed_scale = static_cast<f32>(swap_interval) / 100.f;
}
swap_interval = 1;
}
return swap_interval;
}
} // namespace
HardwareComposer::HardwareComposer() = default;
HardwareComposer::~HardwareComposer() = default;
u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, VI::Display& display,
Nvidia::Devices::nvdisp_disp0& nvdisp, u32 frame_advance) {
boost::container::small_vector<HwcLayer, 2> composition_stack;
m_frame_number += frame_advance;
// Release any necessary framebuffers.
for (auto& [layer_id, framebuffer] : m_framebuffers) {
if (framebuffer.release_frame_number > m_frame_number) {
// Not yet ready to release this framebuffer.
continue;
}
if (!framebuffer.is_acquired) {
// Already released.
continue;
}
if (auto* layer = display.FindLayer(layer_id); layer != nullptr) {
// TODO: support release fence
// This is needed to prevent screen tearing
layer->GetConsumer().ReleaseBuffer(framebuffer.item, android::Fence::NoFence());
framebuffer.is_acquired = false;
}
}
// Set default speed limit to 100%.
*out_speed_scale = 1.0f;
// Determine the number of vsync periods to wait before composing again.
std::optional<s32> swap_interval{};
bool has_acquired_buffer{};
// Acquire all necessary framebuffers.
for (size_t i = 0; i < display.GetNumLayers(); i++) {
auto& layer = display.GetLayer(i);
auto layer_id = layer.GetLayerId();
// Try to fetch the framebuffer (either new or stale).
const auto result = this->CacheFramebufferLocked(layer, layer_id);
// If we failed, skip this layer.
if (result == CacheStatus::NoBufferAvailable) {
continue;
}
// If we acquired a new buffer, we need to present.
if (result == CacheStatus::BufferAcquired) {
has_acquired_buffer = true;
}
const auto& buffer = m_framebuffers[layer_id];
const auto& item = buffer.item;
const auto& igbp_buffer = *item.graphic_buffer;
// TODO: get proper Z-index from layer
composition_stack.emplace_back(HwcLayer{
.buffer_handle = igbp_buffer.BufferId(),
.offset = igbp_buffer.Offset(),
.format = igbp_buffer.ExternalFormat(),
.width = igbp_buffer.Width(),
.height = igbp_buffer.Height(),
.stride = igbp_buffer.Stride(),
.z_index = 0,
.transform = static_cast<android::BufferTransformFlags>(item.transform),
.crop_rect = item.crop,
.acquire_fence = item.fence,
});
// We need to compose again either before this frame is supposed to
// be released, or exactly on the vsync period it should be released.
const s32 item_swap_interval = NormalizeSwapInterval(out_speed_scale, item.swap_interval);
// TODO: handle cases where swap intervals are relatively prime. So far,
// only swap intervals of 0, 1 and 2 have been observed, but if 3 were
// to be introduced, this would cause an issue.
if (swap_interval) {
swap_interval = std::min(*swap_interval, item_swap_interval);
} else {
swap_interval = item_swap_interval;
}
}
// If any new buffers were acquired, we can present.
if (has_acquired_buffer) {
// Sort by Z-index.
std::stable_sort(composition_stack.begin(), composition_stack.end(),
[&](auto& l, auto& r) { return l.z_index < r.z_index; });
// Composite.
nvdisp.Composite(composition_stack);
}
// Render MicroProfile.
MicroProfileFlip();
// Advance by at least one frame.
return swap_interval.value_or(1);
}
void HardwareComposer::RemoveLayerLocked(VI::Display& display, LayerId layer_id) {
// Check if we are tracking a slot with this layer_id.
const auto it = m_framebuffers.find(layer_id);
if (it == m_framebuffers.end()) {
return;
}
// Try to release the buffer item.
auto* const layer = display.FindLayer(layer_id);
if (layer && it->second.is_acquired) {
layer->GetConsumer().ReleaseBuffer(it->second.item, android::Fence::NoFence());
}
// Erase the slot.
m_framebuffers.erase(it);
}
bool HardwareComposer::TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer) {
// Attempt the update.
const auto status = layer.GetConsumer().AcquireBuffer(&framebuffer.item, {}, false);
if (status != android::Status::NoError) {
return false;
}
// We succeeded, so set the new release frame info.
framebuffer.release_frame_number =
NormalizeSwapInterval(nullptr, framebuffer.item.swap_interval);
framebuffer.is_acquired = true;
return true;
}
HardwareComposer::CacheStatus HardwareComposer::CacheFramebufferLocked(VI::Layer& layer,
LayerId layer_id) {
// Check if this framebuffer is already present.
const auto it = m_framebuffers.find(layer_id);
if (it != m_framebuffers.end()) {
// If it's currently still acquired, we are done.
if (it->second.is_acquired) {
return CacheStatus::CachedBufferReused;
}
// Try to acquire a new item.
if (this->TryAcquireFramebufferLocked(layer, it->second)) {
// We got a new item.
return CacheStatus::BufferAcquired;
} else {
// We didn't acquire a new item, but we can reuse the slot.
return CacheStatus::CachedBufferReused;
}
}
// Framebuffer is not present, so try to create it.
Framebuffer framebuffer{};
if (this->TryAcquireFramebufferLocked(layer, framebuffer)) {
// Move the buffer item into a new slot.
m_framebuffers.emplace(layer_id, std::move(framebuffer));
// We succeeded.
return CacheStatus::BufferAcquired;
}
// We couldn't acquire the buffer item, so don't create a slot.
return CacheStatus::NoBufferAvailable;
}
} // namespace Service::Nvnflinger

View file

@ -0,0 +1,59 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <memory>
#include <boost/container/flat_map.hpp>
#include "core/hle/service/nvnflinger/buffer_item.h"
namespace Service::Nvidia::Devices {
class nvdisp_disp0;
}
namespace Service::VI {
class Display;
class Layer;
} // namespace Service::VI
namespace Service::Nvnflinger {
using LayerId = u64;
class HardwareComposer {
public:
explicit HardwareComposer();
~HardwareComposer();
u32 ComposeLocked(f32* out_speed_scale, VI::Display& display,
Nvidia::Devices::nvdisp_disp0& nvdisp, u32 frame_advance);
void RemoveLayerLocked(VI::Display& display, LayerId layer_id);
private:
// TODO: do we want to track frame number in vi instead?
u64 m_frame_number{0};
private:
using ReleaseFrameNumber = u64;
struct Framebuffer {
android::BufferItem item{};
ReleaseFrameNumber release_frame_number{};
bool is_acquired{false};
};
enum class CacheStatus : u32 {
NoBufferAvailable,
BufferAcquired,
CachedBufferReused,
};
boost::container::flat_map<LayerId, Framebuffer> m_framebuffers{};
private:
bool TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer);
CacheStatus CacheFramebufferLocked(VI::Layer& layer, LayerId layer_id);
};
} // namespace Service::Nvnflinger

View file

@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "common/math_util.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvnflinger/buffer_transform_flags.h"
#include "core/hle/service/nvnflinger/pixel_format.h"
#include "core/hle/service/nvnflinger/ui/fence.h"
namespace Service::Nvnflinger {
struct HwcLayer {
u32 buffer_handle;
u32 offset;
android::PixelFormat format;
u32 width;
u32 height;
u32 stride;
s32 z_index;
android::BufferTransformFlags transform;
Common::Rectangle<int> crop_rect;
android::Fence acquire_fence;
};
} // namespace Service::Nvnflinger

View file

@ -18,6 +18,7 @@
#include "core/hle/service/nvnflinger/buffer_item_consumer.h" #include "core/hle/service/nvnflinger/buffer_item_consumer.h"
#include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h"
#include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" #include "core/hle/service/nvnflinger/fb_share_buffer_manager.h"
#include "core/hle/service/nvnflinger/hardware_composer.h"
#include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h"
#include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/nvnflinger.h"
#include "core/hle/service/nvnflinger/ui/graphic_buffer.h" #include "core/hle/service/nvnflinger/ui/graphic_buffer.h"
@ -279,45 +280,19 @@ void Nvnflinger::Compose() {
SCOPE_EXIT({ display.SignalVSyncEvent(); }); SCOPE_EXIT({ display.SignalVSyncEvent(); });
// Don't do anything for displays without layers. // Don't do anything for displays without layers.
if (!display.HasLayers()) if (!display.HasLayers()) {
continue;
// TODO(Subv): Support more than 1 layer.
VI::Layer& layer = display.GetLayer(0);
android::BufferItem buffer{};
const auto status = layer.GetConsumer().AcquireBuffer(&buffer, {}, false);
if (status != android::Status::NoError) {
continue; continue;
} }
const auto& igbp_buffer = *buffer.graphic_buffer;
if (!system.IsPoweredOn()) { if (!system.IsPoweredOn()) {
return; // We are likely shutting down return; // We are likely shutting down
} }
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
ASSERT(nvdisp); ASSERT(nvdisp);
Common::Rectangle<int> crop_rect{ swap_interval = display.GetComposer().ComposeLocked(&compose_speed_scale, display, *nvdisp,
static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), swap_interval);
static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};
nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
buffer.fence.fences, buffer.fence.num_fences);
MicroProfileFlip();
swap_interval = buffer.swap_interval;
layer.GetConsumer().ReleaseBuffer(buffer, android::Fence::NoFence());
} }
} }
@ -334,15 +309,16 @@ s64 Nvnflinger::GetNextTicks() const {
speed_scale = 0.01f; speed_scale = 0.01f;
} }
} }
// Adjust by speed limit determined during composition.
speed_scale /= compose_speed_scale;
if (system.GetNVDECActive() && settings.use_video_framerate.GetValue()) { if (system.GetNVDECActive() && settings.use_video_framerate.GetValue()) {
// Run at intended presentation rate during video playback. // Run at intended presentation rate during video playback.
speed_scale = 1.f; speed_scale = 1.f;
} }
// As an extension, treat nonpositive swap interval as framerate multiplier. const f32 effective_fps = 60.f / static_cast<f32>(swap_interval);
const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval)
: 60.f / static_cast<f32>(swap_interval);
return static_cast<s64>(speed_scale * (1000000000.f / effective_fps)); return static_cast<s64>(speed_scale * (1000000000.f / effective_fps));
} }

View file

@ -46,6 +46,7 @@ class BufferQueueProducer;
namespace Service::Nvnflinger { namespace Service::Nvnflinger {
class FbShareBufferManager; class FbShareBufferManager;
class HardwareComposer;
class HosBinderDriverServer; class HosBinderDriverServer;
class Nvnflinger final { class Nvnflinger final {
@ -143,6 +144,7 @@ private:
u32 next_buffer_queue_id = 1; u32 next_buffer_queue_id = 1;
s32 swap_interval = 1; s32 swap_interval = 1;
f32 compose_speed_scale = 1.0f;
bool is_abandoned = false; bool is_abandoned = false;

View file

@ -16,6 +16,7 @@
#include "core/hle/service/nvnflinger/buffer_queue_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_consumer.h"
#include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h"
#include "core/hle/service/nvnflinger/buffer_queue_producer.h" #include "core/hle/service/nvnflinger/buffer_queue_producer.h"
#include "core/hle/service/nvnflinger/hardware_composer.h"
#include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h"
#include "core/hle/service/vi/display/vi_display.h" #include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h" #include "core/hle/service/vi/layer/vi_layer.h"
@ -43,6 +44,7 @@ Display::Display(u64 id, std::string name_,
KernelHelpers::ServiceContext& service_context_, Core::System& system_) KernelHelpers::ServiceContext& service_context_, Core::System& system_)
: display_id{id}, name{std::move(name_)}, hos_binder_driver_server{hos_binder_driver_server_}, : display_id{id}, name{std::move(name_)}, hos_binder_driver_server{hos_binder_driver_server_},
service_context{service_context_} { service_context{service_context_} {
hardware_composer = std::make_unique<Nvnflinger::HardwareComposer>();
vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id)); vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id));
} }
@ -81,8 +83,6 @@ void Display::SignalVSyncEvent() {
void Display::CreateLayer(u64 layer_id, u32 binder_id, void Display::CreateLayer(u64 layer_id, u32 binder_id,
Service::Nvidia::NvCore::Container& nv_core) { Service::Nvidia::NvCore::Container& nv_core) {
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile()); auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile());
auto buffer_item_consumer = std::make_shared<android::BufferItemConsumer>(std::move(consumer)); auto buffer_item_consumer = std::make_shared<android::BufferItemConsumer>(std::move(consumer));

View file

@ -11,9 +11,14 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "core/hle/result.h" #include "core/hle/result.h"
namespace Core {
class System;
}
namespace Kernel { namespace Kernel {
class KEvent; class KEvent;
} class KReadableEvent;
} // namespace Kernel
namespace Service::android { namespace Service::android {
class BufferQueueProducer; class BufferQueueProducer;
@ -24,8 +29,9 @@ class ServiceContext;
} }
namespace Service::Nvnflinger { namespace Service::Nvnflinger {
class HardwareComposer;
class HosBinderDriverServer; class HosBinderDriverServer;
} } // namespace Service::Nvnflinger
namespace Service::Nvidia::NvCore { namespace Service::Nvidia::NvCore {
class Container; class Container;
@ -118,6 +124,10 @@ public:
/// ///
const Layer* FindLayer(u64 layer_id) const; const Layer* FindLayer(u64 layer_id) const;
Nvnflinger::HardwareComposer& GetComposer() const {
return *hardware_composer;
}
private: private:
u64 display_id; u64 display_id;
std::string name; std::string name;
@ -125,6 +135,7 @@ private:
KernelHelpers::ServiceContext& service_context; KernelHelpers::ServiceContext& service_context;
std::vector<std::unique_ptr<Layer>> layers; std::vector<std::unique_ptr<Layer>> layers;
std::unique_ptr<Nvnflinger::HardwareComposer> hardware_composer;
Kernel::KEvent* vsync_event{}; Kernel::KEvent* vsync_event{};
bool is_abandoned{}; bool is_abandoned{};
}; };

View file

@ -195,8 +195,9 @@ private:
void GetSharedBufferMemoryHandleId(HLERequestContext& ctx) { void GetSharedBufferMemoryHandleId(HLERequestContext& ctx) {
IPC::RequestParser rp{ctx}; IPC::RequestParser rp{ctx};
const u64 buffer_id = rp.PopRaw<u64>(); const u64 buffer_id = rp.PopRaw<u64>();
const u64 aruid = ctx.GetPID();
LOG_INFO(Service_VI, "called. buffer_id={:#x}", buffer_id); LOG_INFO(Service_VI, "called. buffer_id={:#x}, aruid={:#x}", buffer_id, aruid);
struct OutputParameters { struct OutputParameters {
s32 nvmap_handle; s32 nvmap_handle;
@ -206,7 +207,7 @@ private:
OutputParameters out{}; OutputParameters out{};
Nvnflinger::SharedMemoryPoolLayout layout{}; Nvnflinger::SharedMemoryPoolLayout layout{};
const auto result = nvnflinger.GetSystemBufferManager().GetSharedBufferMemoryHandleId( const auto result = nvnflinger.GetSystemBufferManager().GetSharedBufferMemoryHandleId(
&out.size, &out.nvmap_handle, &layout, buffer_id, 0); &out.size, &out.nvmap_handle, &layout, buffer_id, aruid);
ctx.WriteBuffer(&layout, sizeof(layout)); ctx.WriteBuffer(&layout, sizeof(layout));

View file

@ -55,6 +55,7 @@ add_library(video_core STATIC
engines/maxwell_dma.h engines/maxwell_dma.h
engines/puller.cpp engines/puller.cpp
engines/puller.h engines/puller.h
framebuffer_config.cpp
framebuffer_config.h framebuffer_config.h
fsr.cpp fsr.cpp
fsr.h fsr.h
@ -115,8 +116,24 @@ add_library(video_core STATIC
renderer_null/null_rasterizer.h renderer_null/null_rasterizer.h
renderer_null/renderer_null.cpp renderer_null/renderer_null.cpp
renderer_null/renderer_null.h renderer_null/renderer_null.h
renderer_opengl/present/filters.cpp
renderer_opengl/present/filters.h
renderer_opengl/present/fsr.cpp
renderer_opengl/present/fsr.h
renderer_opengl/present/fxaa.cpp
renderer_opengl/present/fxaa.h
renderer_opengl/present/layer.cpp
renderer_opengl/present/layer.h
renderer_opengl/present/present_uniforms.h
renderer_opengl/present/smaa.cpp
renderer_opengl/present/smaa.h
renderer_opengl/present/util.h
renderer_opengl/present/window_adapt_pass.cpp
renderer_opengl/present/window_adapt_pass.h
renderer_opengl/blit_image.cpp renderer_opengl/blit_image.cpp
renderer_opengl/blit_image.h renderer_opengl/blit_image.h
renderer_opengl/gl_blit_screen.cpp
renderer_opengl/gl_blit_screen.h
renderer_opengl/gl_buffer_cache_base.cpp renderer_opengl/gl_buffer_cache_base.cpp
renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_buffer_cache.h
@ -126,8 +143,6 @@ add_library(video_core STATIC
renderer_opengl/gl_device.h renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h renderer_opengl/gl_fence_manager.h
renderer_opengl/gl_fsr.cpp
renderer_opengl/gl_fsr.h
renderer_opengl/gl_graphics_pipeline.cpp renderer_opengl/gl_graphics_pipeline.cpp
renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_graphics_pipeline.h
renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.cpp
@ -155,6 +170,22 @@ add_library(video_core STATIC
renderer_opengl/renderer_opengl.h renderer_opengl/renderer_opengl.h
renderer_opengl/util_shaders.cpp renderer_opengl/util_shaders.cpp
renderer_opengl/util_shaders.h renderer_opengl/util_shaders.h
renderer_vulkan/present/anti_alias_pass.h
renderer_vulkan/present/filters.cpp
renderer_vulkan/present/filters.h
renderer_vulkan/present/fsr.cpp
renderer_vulkan/present/fsr.h
renderer_vulkan/present/fxaa.cpp
renderer_vulkan/present/fxaa.h
renderer_vulkan/present/layer.cpp
renderer_vulkan/present/layer.h
renderer_vulkan/present/present_push_constants.h
renderer_vulkan/present/smaa.cpp
renderer_vulkan/present/smaa.h
renderer_vulkan/present/util.cpp
renderer_vulkan/present/util.h
renderer_vulkan/present/window_adapt_pass.cpp
renderer_vulkan/present/window_adapt_pass.h
renderer_vulkan/blit_image.cpp renderer_vulkan/blit_image.cpp
renderer_vulkan/blit_image.h renderer_vulkan/blit_image.h
renderer_vulkan/fixed_pipeline_state.cpp renderer_vulkan/fixed_pipeline_state.cpp
@ -181,8 +212,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.cpp
renderer_vulkan/vk_fence_manager.h renderer_vulkan/vk_fence_manager.h
renderer_vulkan/vk_fsr.cpp
renderer_vulkan/vk_fsr.h
renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.cpp
@ -203,8 +232,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_smaa.cpp
renderer_vulkan/vk_smaa.h
renderer_vulkan/vk_staging_buffer_pool.cpp renderer_vulkan/vk_staging_buffer_pool.cpp
renderer_vulkan/vk_staging_buffer_pool.h renderer_vulkan/vk_staging_buffer_pool.h
renderer_vulkan/vk_state_tracker.cpp renderer_vulkan/vk_state_tracker.cpp

View file

@ -8,6 +8,7 @@
#include <vector> #include <vector>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_interface.h"

View file

@ -0,0 +1,55 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "video_core/framebuffer_config.h"
namespace Tegra {
Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width,
u32 texture_height) {
f32 left, top, right, bottom;
if (!framebuffer.crop_rect.IsEmpty()) {
// If crop rectangle is not empty, apply properties from rectangle.
left = static_cast<f32>(framebuffer.crop_rect.left);
top = static_cast<f32>(framebuffer.crop_rect.top);
right = static_cast<f32>(framebuffer.crop_rect.right);
bottom = static_cast<f32>(framebuffer.crop_rect.bottom);
} else {
// Otherwise, fall back to framebuffer dimensions.
left = 0;
top = 0;
right = static_cast<f32>(framebuffer.width);
bottom = static_cast<f32>(framebuffer.height);
}
// Apply transformation flags.
auto framebuffer_transform_flags = framebuffer.transform_flags;
if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) {
// Switch left and right.
std::swap(left, right);
}
if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) {
// Switch top and bottom.
std::swap(top, bottom);
}
framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH;
framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV;
if (True(framebuffer_transform_flags)) {
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
static_cast<u32>(framebuffer_transform_flags));
}
// Normalize coordinate space.
left /= static_cast<f32>(texture_width);
top /= static_cast<f32>(texture_height);
right /= static_cast<f32>(texture_width);
bottom /= static_cast<f32>(texture_height);
return Common::Rectangle<f32>(left, top, right, bottom);
}
} // namespace Tegra

View file

@ -7,6 +7,7 @@
#include "common/math_util.h" #include "common/math_util.h"
#include "core/hle/service/nvnflinger/buffer_transform_flags.h" #include "core/hle/service/nvnflinger/buffer_transform_flags.h"
#include "core/hle/service/nvnflinger/pixel_format.h" #include "core/hle/service/nvnflinger/pixel_format.h"
#include "core/hle/service/nvnflinger/ui/fence.h"
namespace Tegra { namespace Tegra {
@ -21,7 +22,10 @@ struct FramebufferConfig {
u32 stride{}; u32 stride{};
Service::android::PixelFormat pixel_format{}; Service::android::PixelFormat pixel_format{};
Service::android::BufferTransformFlags transform_flags{}; Service::android::BufferTransformFlags transform_flags{};
Common::Rectangle<int> crop_rect; Common::Rectangle<int> crop_rect{};
}; };
Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width,
u32 texture_height);
} // namespace Tegra } // namespace Tegra

View file

@ -274,11 +274,6 @@ struct GPU::Impl {
} }
} }
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer);
}
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size) { void FlushRegion(DAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size); gpu_thread.FlushRegion(addr, size);
@ -313,8 +308,9 @@ struct GPU::Impl {
gpu_thread.FlushAndInvalidateRegion(addr, size); gpu_thread.FlushAndInvalidateRegion(addr, size);
} }
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { std::vector<Service::Nvidia::NvFence>&& fences) {
size_t num_fences{fences.size()};
size_t current_request_counter{}; size_t current_request_counter{};
{ {
std::unique_lock<std::mutex> lk(request_swap_mutex); std::unique_lock<std::mutex> lk(request_swap_mutex);
@ -328,13 +324,12 @@ struct GPU::Impl {
} }
} }
const auto wait_fence = const auto wait_fence =
RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { RequestSyncOperation([this, current_request_counter, &layers, &fences, num_fences] {
auto& syncpoint_manager = host1x.GetSyncpointManager(); auto& syncpoint_manager = host1x.GetSyncpointManager();
if (num_fences == 0) { if (num_fences == 0) {
renderer->SwapBuffers(framebuffer); renderer->Composite(layers);
} }
const auto executer = [this, current_request_counter, const auto executer = [this, current_request_counter, layers_copy = layers]() {
framebuffer_copy = *framebuffer]() {
{ {
std::unique_lock<std::mutex> lk(request_swap_mutex); std::unique_lock<std::mutex> lk(request_swap_mutex);
if (--request_swap_counters[current_request_counter] != 0) { if (--request_swap_counters[current_request_counter] != 0) {
@ -342,7 +337,7 @@ struct GPU::Impl {
} }
free_swap_counters.push_back(current_request_counter); free_swap_counters.push_back(current_request_counter);
} }
renderer->SwapBuffers(&framebuffer_copy); renderer->Composite(layers_copy);
}; };
for (size_t i = 0; i < num_fences; i++) { for (size_t i = 0; i < num_fences; i++) {
syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
@ -505,9 +500,9 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
return impl->ShaderNotify(); return impl->ShaderNotify();
} }
void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { std::vector<Service::Nvidia::NvFence>&& fences) {
impl->RequestSwapBuffers(framebuffer, fences, num_fences); impl->RequestComposite(std::move(layers), std::move(fences));
} }
u64 GPU::GetTicks() const { u64 GPU::GetTicks() const {
@ -554,10 +549,6 @@ void GPU::ClearCdmaInstance(u32 id) {
impl->ClearCdmaInstance(id); impl->ClearCdmaInstance(id);
} }
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
impl->SwapBuffers(framebuffer);
}
VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) {
return impl->OnCPURead(addr, size); return impl->OnCPURead(addr, size);
} }

View file

@ -212,8 +212,8 @@ public:
void RendererFrameEndNotify(); void RendererFrameEndNotify();
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences); std::vector<Service::Nvidia::NvFence>&& fences);
/// Performs any additional setup necessary in order to begin GPU emulation. /// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary /// This can be used to launch any necessary threads and register any necessary

View file

@ -40,8 +40,6 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
} }
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
scheduler.Push(submit_list->channel, std::move(submit_list->entries)); scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<GPUTickCommand>(next.data)) { } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork(); system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
@ -78,10 +76,6 @@ void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(channel, std::move(entries))); PushCommand(SubmitListCommand(channel, std::move(entries)));
} }
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
}
void ThreadManager::FlushRegion(DAddr addr, u64 size) { void ThreadManager::FlushRegion(DAddr addr, u64 size) {
if (!is_async) { if (!is_async) {
// Always flush with synchronous GPU mode // Always flush with synchronous GPU mode

View file

@ -44,14 +44,6 @@ struct SubmitListCommand final {
Tegra::CommandList entries; Tegra::CommandList entries;
}; };
/// Command to signal to the GPU thread that a swap buffers is pending
struct SwapBuffersCommand final {
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_)
: framebuffer{std::move(framebuffer_)} {}
std::optional<Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region /// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final { struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
@ -81,8 +73,8 @@ struct FlushAndInvalidateRegionCommand final {
struct GPUTickCommand final {}; struct GPUTickCommand final {};
using CommandData = using CommandData =
std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, std::variant<std::monostate, SubmitListCommand, FlushRegionCommand, InvalidateRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, GPUTickCommand>; FlushAndInvalidateRegionCommand, GPUTickCommand>;
struct CommandDataContainer { struct CommandDataContainer {
CommandDataContainer() = default; CommandDataContainer() = default;
@ -118,9 +110,6 @@ public:
/// Push GPU command entries to be processed /// Push GPU command entries to be processed
void SubmitList(s32 channel, Tegra::CommandList&& entries); void SubmitList(s32 channel, Tegra::CommandList&& entries);
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size); void FlushRegion(DAddr addr, u64 size);

View file

@ -9,7 +9,7 @@ set(FIDELITYFX_FILES
) )
set(GLSL_INCLUDES set(GLSL_INCLUDES
fidelityfx_fsr.comp fidelityfx_fsr.frag
${FIDELITYFX_FILES} ${FIDELITYFX_FILES}
) )
@ -56,10 +56,11 @@ set(SHADER_FILES
vulkan_color_clear.frag vulkan_color_clear.frag
vulkan_color_clear.vert vulkan_color_clear.vert
vulkan_depthstencil_clear.frag vulkan_depthstencil_clear.frag
vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr.vert
vulkan_fidelityfx_fsr_easu_fp32.comp vulkan_fidelityfx_fsr_easu_fp16.frag
vulkan_fidelityfx_fsr_rcas_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.frag
vulkan_fidelityfx_fsr_rcas_fp32.comp vulkan_fidelityfx_fsr_rcas_fp16.frag
vulkan_fidelityfx_fsr_rcas_fp32.frag
vulkan_present.frag vulkan_present.frag
vulkan_present.vert vulkan_present.vert
vulkan_present_scaleforce_fp16.frag vulkan_present_scaleforce_fp16.frag

View file

@ -34,7 +34,6 @@ layout( push_constant ) uniform constants {
}; };
layout(set=0,binding=0) uniform sampler2D InputTexture; layout(set=0,binding=0) uniform sampler2D InputTexture;
layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
#define A_GPU 1 #define A_GPU 1
#define A_GLSL 1 #define A_GLSL 1
@ -72,44 +71,40 @@ layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
#include "ffx_fsr1.h" #include "ffx_fsr1.h"
void CurrFilter(AU2 pos) { #if USE_RCAS
#if USE_BILINEAR layout(location = 0) in vec2 frag_texcoord;
AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
#endif #endif
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos) {
#if USE_EASU #if USE_EASU
#ifndef YUZU_USE_FP16 #ifndef YUZU_USE_FP16
AF3 c; AF3 c;
FsrEasuF(c, pos, Const0, Const1, Const2, Const3); FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); frag_color = AF4(c, 1.0);
#else #else
AH3 c; AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3); FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); frag_color = AH4(c, 1.0);
#endif #endif
#endif #endif
#if USE_RCAS #if USE_RCAS
#ifndef YUZU_USE_FP16 #ifndef YUZU_USE_FP16
AF3 c; AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, Const0); FsrRcasF(c.r, c.g, c.b, pos, Const0);
imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); frag_color = AF4(c, 1.0);
#else #else
AH3 c; AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, Const0); FsrRcasH(c.r, c.g, c.b, pos, Const0);
imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); frag_color = AH4(c, 1.0);
#endif #endif
#endif #endif
} }
layout(local_size_x=64) in;
void main() { void main() {
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern. #if USE_RCAS
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0))));
CurrFilter(gxy); #else
gxy.x += 8u; CurrFilter(AU2(gl_FragCoord.xy));
CurrFilter(gxy); #endif
gxy.y += 8u;
CurrFilter(gxy);
gxy.x -= 8u;
CurrFilter(gxy);
} }

View file

@ -7,8 +7,8 @@ out gl_PerVertex {
vec4 gl_Position; vec4 gl_Position;
}; };
const vec2 vertices[4] = const vec2 vertices[3] =
vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); vec2[3](vec2(-1,-1), vec2(3,-1), vec2(-1, 3));
layout (location = 0) out vec4 posPos; layout (location = 0) out vec4 posPos;

View file

@ -26,21 +26,11 @@
#endif #endif
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout (location = 0) in vec2 tex_coord; layout (location = 0) in vec2 tex_coord;
layout (location = 0) out vec4 frag_color; layout (location = 0) out vec4 frag_color;
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; layout (binding = 0) uniform sampler2D input_texture;
const bool ignore_alpha = true; const bool ignore_alpha = true;

View file

@ -3,22 +3,12 @@
#version 460 core #version 460 core
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color; layout (location = 0) out vec4 color;
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; layout (binding = 0) uniform sampler2D color_texture;
vec4 cubic(float v) { vec4 cubic(float v) {
vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;

View file

@ -7,21 +7,11 @@
#version 460 core #version 460 core
#ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BINDING_COLOR_TEXTURE 0
#endif
layout(location = 0) in vec2 frag_tex_coord; layout(location = 0) in vec2 frag_tex_coord;
layout(location = 0) out vec4 color; layout(location = 0) out vec4 color;
layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; layout(binding = 0) uniform sampler2D color_texture;
const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308);
const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703);

View file

@ -0,0 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
layout(location = 0) out vec2 texcoord;
void main() {
float x = float((gl_VertexIndex & 1) << 2);
float y = float((gl_VertexIndex & 2) << 1);
gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
texcoord = vec2(x, y) / 2.0;
}

View file

@ -7,4 +7,4 @@
#define YUZU_USE_FP16 #define YUZU_USE_FP16
#define USE_EASU 1 #define USE_EASU 1
#include "fidelityfx_fsr.comp" #include "fidelityfx_fsr.frag"

View file

@ -6,4 +6,4 @@
#define USE_EASU 1 #define USE_EASU 1
#include "fidelityfx_fsr.comp" #include "fidelityfx_fsr.frag"

View file

@ -7,4 +7,4 @@
#define YUZU_USE_FP16 #define YUZU_USE_FP16
#define USE_RCAS 1 #define USE_RCAS 1
#include "fidelityfx_fsr.comp" #include "fidelityfx_fsr.frag"

View file

@ -6,4 +6,4 @@
#define USE_RCAS 1 #define USE_RCAS 1
#include "fidelityfx_fsr.comp" #include "fidelityfx_fsr.frag"

View file

@ -7,7 +7,7 @@ layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color; layout (location = 0) out vec4 color;
layout (binding = 1) uniform sampler2D color_texture; layout (binding = 0) uniform sampler2D color_texture;
void main() { void main() {
color = texture(color_texture, frag_tex_coord); color = texture(color_texture, frag_tex_coord);

View file

@ -3,16 +3,37 @@
#version 460 core #version 460 core
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord; layout (location = 0) out vec2 frag_tex_coord;
layout (set = 0, binding = 0) uniform MatrixBlock { struct ScreenRectVertex {
mat4 modelview_matrix; vec2 position;
vec2 tex_coord;
}; };
void main() { layout (push_constant) uniform PushConstants {
gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); mat4 modelview_matrix;
frag_tex_coord = vert_tex_coord; ScreenRectVertex vertices[4];
};
// Vulkan spec 15.8.1:
// Any member of a push constant block that is declared as an
// array must only be accessed with dynamically uniform indices.
ScreenRectVertex GetVertex(int index) {
switch (index) {
case 0:
default:
return vertices[0];
case 1:
return vertices[1];
case 2:
return vertices[2];
case 3:
return vertices[3];
}
}
void main() {
ScreenRectVertex vertex = GetVertex(gl_VertexIndex);
gl_Position = modelview_matrix * vec4(vertex.position, 0.0, 1.0);
frag_tex_coord = vertex.tex_coord;
} }

View file

@ -5,6 +5,7 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#define VERSION 1
#define YUZU_USE_FP16 #define YUZU_USE_FP16
#include "opengl_present_scaleforce.frag" #include "opengl_present_scaleforce.frag"

View file

@ -5,4 +5,6 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#define VERSION 1
#include "opengl_present_scaleforce.frag" #include "opengl_present_scaleforce.frag"

View file

@ -155,12 +155,6 @@ public:
virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) = 0; std::span<const u8> memory) = 0;
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
DAddr framebuffer_addr, u32 pixel_stride) {
return false;
}
/// Initialize disk cached resources for the game being emulated /// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {} const DiskResourceLoadCallback& callback) {}

View file

@ -38,7 +38,7 @@ public:
virtual ~RendererBase(); virtual ~RendererBase();
/// Finalize rendering the guest frame and draw into the presentation texture /// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0;
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;

View file

@ -92,10 +92,6 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac
} }
void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) {} std::span<const u8> memory) {}
bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config,
DAddr framebuffer_addr, u32 pixel_stride) {
return true;
}
void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {} const VideoCore::DiskResourceLoadCallback& callback) {}
void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) { void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {

View file

@ -77,8 +77,6 @@ public:
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override; std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading, void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override; const VideoCore::DiskResourceLoadCallback& callback) override;
void InitializeChannel(Tegra::Control::ChannelState& channel) override; void InitializeChannel(Tegra::Control::ChannelState& channel) override;

View file

@ -13,8 +13,8 @@ RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gp
RendererNull::~RendererNull() = default; RendererNull::~RendererNull() = default;
void RendererNull::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { void RendererNull::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) {
if (!framebuffer) { if (framebuffers.empty()) {
return; return;
} }

View file

@ -17,7 +17,7 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context); std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererNull() override; ~RendererNull() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void Composite(std::span<const Tegra::FramebufferConfig> framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override { VideoCore::RasterizerInterface* ReadRasterizer() override {
return &m_rasterizer; return &m_rasterizer;

View file

@ -0,0 +1,96 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/present/filters.h"
#include "video_core/renderer_opengl/present/layer.h"
#include "video_core/renderer_opengl/present/window_adapt_pass.h"
namespace OpenGL {
BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_,
Tegra::MaxwellDeviceMemoryManager& device_memory_,
StateTracker& state_tracker_, ProgramManager& program_manager_,
Device& device_)
: rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_),
program_manager(program_manager_), device(device_) {}
BlitScreen::~BlitScreen() = default;
void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers,
const Layout::FramebufferLayout& layout) {
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
state_tracker.NotifyCullTest();
state_tracker.NotifyDepthTest();
state_tracker.NotifyStencilTest();
state_tracker.NotifyPolygonOffset();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
state_tracker.NotifyLogicOp();
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_POLYGON_OFFSET_FILL);
glDisable(GL_RASTERIZER_DISCARD);
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthRangeIndexed(0, 0.0, 0.0);
while (layers.size() < framebuffers.size()) {
layers.emplace_back(rasterizer, device_memory);
}
CreateWindowAdapt();
window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout);
// TODO
// program_manager.RestoreGuestPipeline();
}
void BlitScreen::CreateWindowAdapt() {
if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) {
return;
}
current_window_adapt = Settings::values.scaling_filter.GetValue();
switch (current_window_adapt) {
case Settings::ScalingFilter::NearestNeighbor:
window_adapt = MakeNearestNeighbor(device);
break;
case Settings::ScalingFilter::Bicubic:
window_adapt = MakeBicubic(device);
break;
case Settings::ScalingFilter::Gaussian:
window_adapt = MakeGaussian(device);
break;
case Settings::ScalingFilter::ScaleForce:
window_adapt = MakeScaleForce(device);
break;
case Settings::ScalingFilter::Fsr:
case Settings::ScalingFilter::Bilinear:
default:
window_adapt = MakeBilinear(device);
break;
}
}
} // namespace OpenGL

View file

@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <list>
#include <memory>
#include <span>
#include "core/hle/service/nvnflinger/pixel_format.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace Settings {
enum class ScalingFilter : u32;
}
namespace OpenGL {
class Device;
class Layer;
class ProgramManager;
class RasterizerOpenGL;
class StateTracker;
class WindowAdaptPass;
/// Structure used for storing information about the display target for the Switch screen
struct FramebufferTextureInfo {
GLuint display_texture{};
u32 width;
u32 height;
u32 scaled_width;
u32 scaled_height;
};
class BlitScreen {
public:
explicit BlitScreen(RasterizerOpenGL& rasterizer,
Tegra::MaxwellDeviceMemoryManager& device_memory,
StateTracker& state_tracker, ProgramManager& program_manager,
Device& device);
~BlitScreen();
/// Draws the emulated screens to the emulator window.
void DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers,
const Layout::FramebufferLayout& layout);
private:
void CreateWindowAdapt();
RasterizerOpenGL& rasterizer;
Tegra::MaxwellDeviceMemoryManager& device_memory;
StateTracker& state_tracker;
ProgramManager& program_manager;
Device& device;
Settings::ScalingFilter current_window_adapt{};
std::unique_ptr<WindowAdaptPass> window_adapt;
std::list<Layer> layers;
};
} // namespace OpenGL

View file

@ -1,101 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "video_core/fsr.h"
#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
using namespace FSR;
using FsrConstants = std::array<u32, 4 * 4>;
FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
std::string_view fsr_rcas_source)
: fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)},
fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)},
fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} {
glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f);
glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f);
}
FSR::~FSR() = default;
void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
u32 input_image_width, u32 input_image_height,
const Common::Rectangle<int>& crop_rect) {
const auto output_image_width = screen.GetWidth();
const auto output_image_height = screen.GetHeight();
if (fsr_intermediate_tex.handle) {
GLint fsr_tex_width, fsr_tex_height;
glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH,
&fsr_tex_width);
glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT,
&fsr_tex_height);
if (static_cast<u32>(fsr_tex_width) != output_image_width ||
static_cast<u32>(fsr_tex_height) != output_image_height) {
fsr_intermediate_tex.Release();
}
}
if (!fsr_intermediate_tex.handle) {
fsr_intermediate_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width,
output_image_height);
glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0,
fsr_intermediate_tex.handle, 0);
}
GLint old_draw_fb;
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
glFrontFace(GL_CW);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width),
static_cast<GLfloat>(output_image_height));
FsrConstants constants;
FsrEasuConOffset(
constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12,
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
static_cast<f32>(input_image_width), static_cast<f32>(input_image_height),
static_cast<f32>(output_image_width), static_cast<f32>(output_image_height),
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants));
program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
glBindTextureUnit(0, fsr_intermediate_tex.handle);
const float sharpening =
static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
FsrRcasCon(constants.data(), sharpening);
glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants));
}
void FSR::InitBuffers() {
fsr_framebuffer.Create();
}
void FSR::ReleaseBuffers() {
fsr_framebuffer.Release();
fsr_intermediate_tex.Release();
}
const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept {
return fsr_rcas_frag;
}
bool FSR::AreBuffersInitialized() const noexcept {
return fsr_framebuffer.handle;
}
} // namespace OpenGL

View file

@ -1,43 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string_view>
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/fsr.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class ProgramManager;
class FSR {
public:
explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
std::string_view fsr_rcas_source);
~FSR();
void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
u32 input_image_width, u32 input_image_height,
const Common::Rectangle<int>& crop_rect);
void InitBuffers();
void ReleaseBuffers();
[[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept;
[[nodiscard]] bool AreBuffersInitialized() const noexcept;
private:
OGLFramebuffer fsr_framebuffer;
OGLProgram fsr_vertex;
OGLProgram fsr_easu_frag;
OGLProgram fsr_rcas_frag;
OGLTexture fsr_intermediate_tex;
};
} // namespace OpenGL

View file

@ -71,10 +71,10 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::MaxwellDeviceMemoryManager& device_memory_,
const Device& device_, ScreenInfo& screen_info_, const Device& device_, ProgramManager& program_manager_,
ProgramManager& program_manager_, StateTracker& state_tracker_) StateTracker& state_tracker_)
: gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), : gpu(gpu_), device_memory(device_memory_), device(device_), program_manager(program_manager_),
program_manager(program_manager_), state_tracker(state_tracker_), state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
texture_cache(texture_cache_runtime, device_memory_), texture_cache(texture_cache_runtime, device_memory_),
buffer_cache_runtime(device, staging_buffer_pool), buffer_cache_runtime(device, staging_buffer_pool),
@ -739,27 +739,29 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
query_cache.InvalidateRegion(*cpu_addr, copy_size); query_cache.InvalidateRegion(*cpu_addr, copy_size);
} }
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, std::optional<FramebufferTextureInfo> RasterizerOpenGL::AccelerateDisplay(
DAddr framebuffer_addr, u32 pixel_stride) { const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) { if (framebuffer_addr == 0) {
return false; return {};
} }
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view{ const auto [image_view, scaled] =
texture_cache.TryFindFramebufferImageView(config, framebuffer_addr)}; texture_cache.TryFindFramebufferImageView(config, framebuffer_addr);
if (!image_view) { if (!image_view) {
return false; return {};
} }
// Verify that the cached surface is the same size and format as the requested framebuffer
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
screen_info.texture.width = image_view->size.width; const auto& resolution = Settings::values.resolution_info;
screen_info.texture.height = image_view->size.height;
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); FramebufferTextureInfo info{};
return true; info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
info.width = image_view->size.width;
info.height = image_view->size.height;
info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width;
info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height;
return info;
} }
void RasterizerOpenGL::SyncState() { void RasterizerOpenGL::SyncState() {

View file

@ -16,6 +16,7 @@
#include "video_core/engines/maxwell_dma.h" #include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/blit_image.h" #include "video_core/renderer_opengl/blit_image.h"
#include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h" #include "video_core/renderer_opengl/gl_fence_manager.h"
@ -37,7 +38,7 @@ class MemoryManager;
namespace OpenGL { namespace OpenGL {
struct ScreenInfo; struct FramebufferTextureInfo;
struct ShaderEntries; struct ShaderEntries;
struct BindlessSSBO { struct BindlessSSBO {
@ -76,8 +77,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerInterface,
public: public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::MaxwellDeviceMemoryManager& device_memory_,
const Device& device_, ScreenInfo& screen_info_, const Device& device_, ProgramManager& program_manager_,
ProgramManager& program_manager_, StateTracker& state_tracker_); StateTracker& state_tracker_);
~RasterizerOpenGL() override; ~RasterizerOpenGL() override;
void Draw(bool is_indexed, u32 instance_count) override; void Draw(bool is_indexed, u32 instance_count) override;
@ -122,8 +123,6 @@ public:
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override; std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading, void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override; const VideoCore::DiskResourceLoadCallback& callback) override;
@ -144,6 +143,10 @@ public:
return true; return true;
} }
std::optional<FramebufferTextureInfo> AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr,
u32 pixel_stride);
private: private:
static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGES = 48;
@ -237,7 +240,6 @@ private:
Tegra::MaxwellDeviceMemoryManager& device_memory; Tegra::MaxwellDeviceMemoryManager& device_memory;
const Device& device; const Device& device;
ScreenInfo& screen_info;
ProgramManager& program_manager; ProgramManager& program_manager;
StateTracker& state_tracker; StateTracker& state_tracker;

View file

@ -1051,6 +1051,10 @@ void Image::Scale(bool up_scale) {
state_tracker.NotifyScissor0(); state_tracker.NotifyScissor0();
} }
bool Image::IsRescaled() const {
return True(flags & ImageFlagBits::Rescaled);
}
bool Image::ScaleUp(bool ignore) { bool Image::ScaleUp(bool ignore) {
const auto& resolution = runtime->resolution; const auto& resolution = runtime->resolution;
if (!resolution.active) { if (!resolution.active) {

View file

@ -217,6 +217,8 @@ public:
return gl_type; return gl_type;
} }
bool IsRescaled() const;
bool ScaleUp(bool ignore = false); bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false); bool ScaleDown(bool ignore = false);

View file

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/present_bicubic_frag.h"
#include "video_core/host_shaders/present_gaussian_frag.h"
#include "video_core/renderer_opengl/present/filters.h"
#include "video_core/renderer_opengl/present/util.h"
namespace OpenGL {
std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device) {
return std::make_unique<WindowAdaptPass>(device, CreateNearestNeighborSampler(),
HostShaders::OPENGL_PRESENT_FRAG);
}
std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device) {
return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(),
HostShaders::OPENGL_PRESENT_FRAG);
}
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device) {
return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(),
HostShaders::PRESENT_BICUBIC_FRAG);
}
std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device) {
return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(),
HostShaders::PRESENT_GAUSSIAN_FRAG);
}
std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device) {
return std::make_unique<WindowAdaptPass>(
device, CreateBilinearSampler(),
fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG));
}
} // namespace OpenGL

View file

@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "video_core/renderer_opengl/present/window_adapt_pass.h"
namespace OpenGL {
std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device);
std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device);
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device);
std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device);
std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device);
} // namespace OpenGL

View file

@ -0,0 +1,98 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "video_core/fsr.h"
#include "video_core/host_shaders/ffx_a_h.h"
#include "video_core/host_shaders/ffx_fsr1_h.h"
#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h"
#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h"
#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/present/fsr.h"
#include "video_core/renderer_opengl/present/util.h"
namespace OpenGL {
using namespace FSR;
using FsrConstants = std::array<u32, 4 * 4>;
FSR::FSR(u32 output_width_, u32 output_height_) : width(output_width_), height(output_height_) {
std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG};
ReplaceInclude(fsr_source, "ffx_a.h", HostShaders::FFX_A_H);
ReplaceInclude(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H);
std::string fsr_easu_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG};
std::string fsr_rcas_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG};
ReplaceInclude(fsr_easu_source, "opengl_fidelityfx_fsr.frag", fsr_source);
ReplaceInclude(fsr_rcas_source, "opengl_fidelityfx_fsr.frag", fsr_source);
vert = CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER);
easu_frag = CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER);
rcas_frag = CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER);
glProgramUniform2f(vert.handle, 0, 1.0f, -1.0f);
glProgramUniform2f(vert.handle, 1, 0.0f, 1.0f);
sampler = CreateBilinearSampler();
framebuffer.Create();
easu_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(easu_tex.handle, 1, GL_RGBA16F, width, height);
rcas_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(rcas_tex.handle, 1, GL_RGBA16F, width, height);
}
FSR::~FSR() = default;
GLuint FSR::Draw(ProgramManager& program_manager, GLuint texture, u32 input_image_width,
u32 input_image_height, const Common::Rectangle<f32>& crop_rect) {
const f32 input_width = static_cast<f32>(input_image_width);
const f32 input_height = static_cast<f32>(input_image_height);
const f32 output_width = static_cast<f32>(width);
const f32 output_height = static_cast<f32>(height);
const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_width;
const f32 viewport_x = crop_rect.left * input_width;
const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_height;
const f32 viewport_y = crop_rect.top * input_height;
FsrConstants easu_con{};
FsrConstants rcas_con{};
FsrEasuConOffset(easu_con.data() + 0, easu_con.data() + 4, easu_con.data() + 8,
easu_con.data() + 12, viewport_width, viewport_height, input_width,
input_height, output_width, output_height, viewport_x, viewport_y);
const float sharpening =
static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
FsrRcasCon(rcas_con.data(), sharpening);
glProgramUniform4uiv(easu_frag.handle, 0, sizeof(easu_con), easu_con.data());
glProgramUniform4uiv(rcas_frag.handle, 0, sizeof(rcas_con), rcas_con.data());
glFrontFace(GL_CW);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, easu_tex.handle, 0);
glViewportIndexedf(0, 0.0f, 0.0f, output_width, output_height);
program_manager.BindPresentPrograms(vert.handle, easu_frag.handle);
glBindTextureUnit(0, texture);
glBindSampler(0, sampler.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, rcas_tex.handle, 0);
program_manager.BindPresentPrograms(vert.handle, rcas_frag.handle);
glBindTextureUnit(0, easu_tex.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
return rcas_tex.handle;
}
bool FSR::NeedsRecreation(const Common::Rectangle<u32>& screen) {
return screen.GetWidth() != width || screen.GetHeight() != height;
}
} // namespace OpenGL

View file

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string_view>
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/fsr.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class ProgramManager;
class FSR {
public:
explicit FSR(u32 output_width, u32 output_height);
~FSR();
GLuint Draw(ProgramManager& program_manager, GLuint texture, u32 input_image_width,
u32 input_image_height, const Common::Rectangle<f32>& crop_rect);
bool NeedsRecreation(const Common::Rectangle<u32>& screen);
private:
const u32 width;
const u32 height;
OGLFramebuffer framebuffer;
OGLSampler sampler;
OGLProgram vert;
OGLProgram easu_frag;
OGLProgram rcas_frag;
OGLTexture easu_tex;
OGLTexture rcas_tex;
};
} // namespace OpenGL

View file

@ -0,0 +1,41 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/present/fxaa.h"
#include "video_core/renderer_opengl/present/util.h"
namespace OpenGL {
FXAA::FXAA(u32 width, u32 height) {
vert_shader = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
frag_shader = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
sampler = CreateBilinearSampler();
framebuffer.Create();
texture.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.handle, 1, GL_RGBA16F, width, height);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0);
}
FXAA::~FXAA() = default;
GLuint FXAA::Draw(ProgramManager& program_manager, GLuint input_texture) {
glFrontFace(GL_CCW);
program_manager.BindPresentPrograms(vert_shader.handle, frag_shader.handle);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
glBindTextureUnit(0, input_texture);
glBindSampler(0, sampler.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glFrontFace(GL_CW);
return texture.handle;
}
} // namespace OpenGL

View file

@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class ProgramManager;
class FXAA {
public:
explicit FXAA(u32 width, u32 height);
~FXAA();
GLuint Draw(ProgramManager& program_manager, GLuint input_texture);
private:
OGLProgram vert_shader;
OGLProgram frag_shader;
OGLSampler sampler;
OGLFramebuffer framebuffer;
OGLTexture texture;
};
} // namespace OpenGL

View file

@ -0,0 +1,215 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/framebuffer_config.h"
#include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/present/fsr.h"
#include "video_core/renderer_opengl/present/fxaa.h"
#include "video_core/renderer_opengl/present/layer.h"
#include "video_core/renderer_opengl/present/present_uniforms.h"
#include "video_core/renderer_opengl/present/smaa.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
: rasterizer(rasterizer_), device_memory(device_memory_) {
// Allocate textures for the screen
framebuffer_texture.resource.Create(GL_TEXTURE_2D);
const GLuint texture = framebuffer_texture.resource.handle;
glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
// Clear screen to black
const u8 framebuffer_data[4] = {0, 0, 0, 0};
glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE,
framebuffer_data);
}
Layer::~Layer() = default;
GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
std::array<ScreenRectVertex, 4>& out_vertices,
ProgramManager& program_manager,
const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout& layout) {
FramebufferTextureInfo info = PrepareRenderTarget(framebuffer);
auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height);
GLuint texture = info.display_texture;
auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
if (anti_aliasing != Settings::AntiAliasing::None) {
glEnablei(GL_SCISSOR_TEST, 0);
auto viewport_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width);
auto viewport_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height);
glScissorIndexed(0, 0, 0, viewport_width, viewport_height);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width),
static_cast<GLfloat>(viewport_height));
switch (anti_aliasing) {
case Settings::AntiAliasing::Fxaa:
CreateFXAA();
texture = fxaa->Draw(program_manager, info.display_texture);
break;
case Settings::AntiAliasing::Smaa:
default:
CreateSMAA();
texture = smaa->Draw(program_manager, info.display_texture);
break;
}
}
glDisablei(GL_SCISSOR_TEST, 0);
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
if (!fsr || fsr->NeedsRecreation(layout.screen)) {
fsr = std::make_unique<FSR>(layout.screen.GetWidth(), layout.screen.GetHeight());
}
texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop);
crop = {0, 0, 1, 1};
}
out_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
// Map the coordinates to the screen.
const auto& screen = layout.screen;
const auto x = screen.left;
const auto y = screen.top;
const auto w = screen.GetWidth();
const auto h = screen.GetHeight();
out_vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top);
out_vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top);
out_vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom);
out_vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom);
return texture;
}
FramebufferTextureInfo Layer::PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
if (framebuffer_texture.width != static_cast<GLsizei>(framebuffer.width) ||
framebuffer_texture.height != static_cast<GLsizei>(framebuffer.height) ||
framebuffer_texture.pixel_format != framebuffer.pixel_format ||
gl_framebuffer_data.empty()) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
ConfigureFramebufferTexture(framebuffer);
}
// Load the framebuffer from memory if needed
return LoadFBToScreenInfo(framebuffer);
}
FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
const auto accelerated_info =
rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride);
if (accelerated_info) {
return *accelerated_info;
}
// Reset the screen info's display texture to its own permanent texture
FramebufferTextureInfo info{};
info.display_texture = framebuffer_texture.resource.handle;
info.width = framebuffer.width;
info.height = framebuffer.height;
info.scaled_width = framebuffer.width;
info.scaled_height = framebuffer.height;
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
const std::span<const u8> input_data(host_ptr, size_in_bytes);
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
framebuffer.width, framebuffer.height, 1, block_height_log2,
0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
// TODO: Test what happens on hardware when you change the framebuffer dimensions so that
// they differ from the LCD resolution.
// TODO: Applications could theoretically crash yuzu here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width,
framebuffer.height, framebuffer_texture.gl_format,
framebuffer_texture.gl_type, gl_framebuffer_data.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
return info;
}
void Layer::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) {
framebuffer_texture.width = framebuffer.width;
framebuffer_texture.height = framebuffer.height;
framebuffer_texture.pixel_format = framebuffer.pixel_format;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height *
bytes_per_pixel);
GLint internal_format;
switch (framebuffer.pixel_format) {
case Service::android::PixelFormat::Rgba8888:
internal_format = GL_RGBA8;
framebuffer_texture.gl_format = GL_RGBA;
framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
case Service::android::PixelFormat::Rgb565:
internal_format = GL_RGB565;
framebuffer_texture.gl_format = GL_RGB;
framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
break;
default:
internal_format = GL_RGBA8;
framebuffer_texture.gl_format = GL_RGBA;
framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
// static_cast<u32>(framebuffer.pixel_format));
break;
}
framebuffer_texture.resource.Release();
framebuffer_texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format,
framebuffer_texture.width, framebuffer_texture.height);
fxaa.reset();
smaa.reset();
}
void Layer::CreateFXAA() {
smaa.reset();
if (!fxaa) {
fxaa = std::make_unique<FXAA>(
Settings::values.resolution_info.ScaleUp(framebuffer_texture.width),
Settings::values.resolution_info.ScaleUp(framebuffer_texture.height));
}
}
void Layer::CreateSMAA() {
fxaa.reset();
if (!smaa) {
smaa = std::make_unique<SMAA>(
Settings::values.resolution_info.ScaleUp(framebuffer_texture.width),
Settings::values.resolution_info.ScaleUp(framebuffer_texture.height));
}
}
} // namespace OpenGL

View file

@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <vector>
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Service::android {
enum class PixelFormat : u32;
};
namespace Tegra {
struct FramebufferConfig;
}
namespace OpenGL {
struct FramebufferTextureInfo;
class FSR;
class FXAA;
class ProgramManager;
class RasterizerOpenGL;
class SMAA;
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
OGLTexture resource;
GLsizei width;
GLsizei height;
GLenum gl_format;
GLenum gl_type;
Service::android::PixelFormat pixel_format;
};
struct ScreenRectVertex;
class Layer {
public:
explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory);
~Layer();
GLuint ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
std::array<ScreenRectVertex, 4>& out_vertices,
ProgramManager& program_manager,
const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout& layout);
private:
/// Loads framebuffer from emulated memory into the active OpenGL texture.
FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer);
void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer);
void CreateFXAA();
void CreateSMAA();
private:
RasterizerOpenGL& rasterizer;
Tegra::MaxwellDeviceMemoryManager& device_memory;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Display information for Switch screen
TextureInfo framebuffer_texture;
std::unique_ptr<FSR> fsr;
std::unique_ptr<FXAA> fxaa;
std::unique_ptr<SMAA> smaa;
};
} // namespace OpenGL

View file

@ -0,0 +1,43 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
struct ScreenRectVertex {
constexpr ScreenRectVertex() = default;
constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
: position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position{};
std::array<GLfloat, 2> tex_coord{};
};
/**
* Defines a 1:1 pixel orthographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
*
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*/
static inline std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
// clang-format off
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// Last matrix row is implicitly assumed to be [0, 0, 1].
// clang-format on
return matrix;
}
} // namespace OpenGL

View file

@ -0,0 +1,102 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/host_shaders/opengl_smaa_glsl.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h"
#include "video_core/host_shaders/smaa_edge_detection_frag.h"
#include "video_core/host_shaders/smaa_edge_detection_vert.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/present/smaa.h"
#include "video_core/renderer_opengl/present/util.h"
#include "video_core/smaa_area_tex.h"
#include "video_core/smaa_search_tex.h"
namespace OpenGL {
SMAA::SMAA(u32 width, u32 height) {
const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) {
std::string shader_source{specialized_source};
ReplaceInclude(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL);
return CreateProgram(shader_source, stage);
};
edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER);
edge_detection_frag = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER);
blending_weight_calculation_vert =
SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER);
blending_weight_calculation_frag =
SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER);
neighborhood_blending_vert =
SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER);
neighborhood_blending_frag =
SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
area_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT);
glTextureSubImage2D(area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG,
GL_UNSIGNED_BYTE, areaTexBytes);
search_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT);
glTextureSubImage2D(search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED,
GL_UNSIGNED_BYTE, searchTexBytes);
edges_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(edges_tex.handle, 1, GL_RG16F, width, height);
blend_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(blend_tex.handle, 1, GL_RGBA16F, width, height);
sampler = CreateBilinearSampler();
framebuffer.Create();
texture.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.handle, 1, GL_RGBA16F, width, height);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0);
}
SMAA::~SMAA() = default;
GLuint SMAA::Draw(ProgramManager& program_manager, GLuint input_texture) {
glClearColor(0, 0, 0, 0);
glFrontFace(GL_CCW);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
glBindSampler(0, sampler.handle);
glBindSampler(1, sampler.handle);
glBindSampler(2, sampler.handle);
glBindTextureUnit(0, input_texture);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, edges_tex.handle, 0);
glClear(GL_COLOR_BUFFER_BIT);
program_manager.BindPresentPrograms(edge_detection_vert.handle, edge_detection_frag.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindTextureUnit(0, edges_tex.handle);
glBindTextureUnit(1, area_tex.handle);
glBindTextureUnit(2, search_tex.handle);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, blend_tex.handle, 0);
glClear(GL_COLOR_BUFFER_BIT);
program_manager.BindPresentPrograms(blending_weight_calculation_vert.handle,
blending_weight_calculation_frag.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindTextureUnit(0, input_texture);
glBindTextureUnit(1, blend_tex.handle);
glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0);
program_manager.BindPresentPrograms(neighborhood_blending_vert.handle,
neighborhood_blending_frag.handle);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLES, 0, 3);
glFrontFace(GL_CW);
return texture.handle;
}
} // namespace OpenGL

View file

@ -0,0 +1,35 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class ProgramManager;
class SMAA {
public:
explicit SMAA(u32 width, u32 height);
~SMAA();
GLuint Draw(ProgramManager& program_manager, GLuint input_texture);
private:
OGLProgram edge_detection_vert;
OGLProgram blending_weight_calculation_vert;
OGLProgram neighborhood_blending_vert;
OGLProgram edge_detection_frag;
OGLProgram blending_weight_calculation_frag;
OGLProgram neighborhood_blending_frag;
OGLTexture area_tex;
OGLTexture search_tex;
OGLTexture edges_tex;
OGLTexture blend_tex;
OGLSampler sampler;
OGLFramebuffer framebuffer;
OGLTexture texture;
};
} // namespace OpenGL

View file

@ -0,0 +1,43 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
static inline void ReplaceInclude(std::string& shader_source, std::string_view include_name,
std::string_view include_content) {
const std::string include_string = fmt::format("#include \"{}\"", include_name);
const std::size_t pos = shader_source.find(include_string);
ASSERT(pos != std::string::npos);
shader_source.replace(pos, include_string.size(), include_content);
};
static inline OGLSampler CreateBilinearSampler() {
OGLSampler sampler;
sampler.Create();
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
return sampler;
}
static inline OGLSampler CreateNearestNeighborSampler() {
OGLSampler sampler;
sampler.Create();
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
return sampler;
}
} // namespace OpenGL

View file

@ -0,0 +1,103 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "video_core/framebuffer_config.h"
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/present/layer.h"
#include "video_core/renderer_opengl/present/present_uniforms.h"
#include "video_core/renderer_opengl/present/window_adapt_pass.h"
namespace OpenGL {
WindowAdaptPass::WindowAdaptPass(const Device& device_, OGLSampler&& sampler_,
std::string_view frag_source)
: device(device_), sampler(std::move(sampler_)) {
vert = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
frag = CreateProgram(frag_source, GL_FRAGMENT_SHADER);
// Generate VBO handle for drawing
vertex_buffer.Create();
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
// Query vertex buffer address when the driver supports unified vertex attributes
if (device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
&vertex_buffer_address);
}
}
WindowAdaptPass::~WindowAdaptPass() = default;
void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers,
std::span<const Tegra::FramebufferConfig> framebuffers,
const Layout::FramebufferLayout& layout) {
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
const size_t layer_count = framebuffers.size();
std::vector<GLuint> textures(layer_count);
std::vector<std::array<GLfloat, 3 * 2>> matrices(layer_count);
std::vector<std::array<ScreenRectVertex, 4>> vertices(layer_count);
auto layer_it = layers.begin();
for (size_t i = 0; i < layer_count; i++) {
textures[i] = layer_it->ConfigureDraw(matrices[i], vertices[i], program_manager,
framebuffers[i], layout);
layer_it++;
}
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
program_manager.BindPresentPrograms(vert.handle, frag.handle);
glDisable(GL_FRAMEBUFFER_SRGB);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glEnableVertexAttribArray(PositionLocation);
glEnableVertexAttribArray(TexCoordLocation);
glVertexAttribDivisor(PositionLocation, 0);
glVertexAttribDivisor(TexCoordLocation, 0);
glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0);
if (device.HasVertexBufferUnifiedMemory()) {
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
sizeof(decltype(vertices)::value_type));
} else {
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
}
glBindSampler(0, sampler.handle);
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
Settings::values.bg_green.GetValue() / 255.0f,
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
for (size_t i = 0; i < layer_count; i++) {
glBindTextureUnit(0, textures[i]);
glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE,
matrices[i].data());
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices[i]), std::data(vertices[i]));
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
}
} // namespace OpenGL

View file

@ -0,0 +1,47 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <list>
#include <span>
#include "common/math_util.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace OpenGL {
class Device;
class Layer;
class ProgramManager;
class WindowAdaptPass final {
public:
explicit WindowAdaptPass(const Device& device, OGLSampler&& sampler,
std::string_view frag_source);
~WindowAdaptPass();
void DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers,
std::span<const Tegra::FramebufferConfig> framebuffers,
const Layout::FramebufferLayout& layout);
private:
const Device& device;
OGLSampler sampler;
OGLProgram vert;
OGLProgram frag;
OGLBuffer vertex_buffer;
// GPU address of the vertex buffer
GLuint64EXT vertex_buffer_address = 0;
};
} // namespace OpenGL

View file

@ -16,68 +16,15 @@
#include "core/core_timing.h" #include "core/core_timing.h"
#include "core/frontend/emu_window.h" #include "core/frontend/emu_window.h"
#include "core/telemetry_session.h" #include "core/telemetry_session.h"
#include "video_core/host_shaders/ffx_a_h.h" #include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/host_shaders/ffx_fsr1_h.h"
#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h"
#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h"
#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/host_shaders/opengl_smaa_glsl.h"
#include "video_core/host_shaders/present_bicubic_frag.h"
#include "video_core/host_shaders/present_gaussian_frag.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h"
#include "video_core/host_shaders/smaa_edge_detection_frag.h"
#include "video_core/host_shaders/smaa_edge_detection_vert.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h"
#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/smaa_area_tex.h"
#include "video_core/smaa_search_tex.h"
#include "video_core/textures/decoders.h" #include "video_core/textures/decoders.h"
namespace OpenGL { namespace OpenGL {
namespace { namespace {
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
struct ScreenRectVertex {
constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
: position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position;
std::array<GLfloat, 2> tex_coord;
};
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
*
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*/
std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
// clang-format off
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// Last matrix row is implicitly assumed to be [0, 0, 1].
// clang-format on
return matrix;
}
const char* GetSource(GLenum source) { const char* GetSource(GLenum source) {
switch (source) { switch (source) {
case GL_DEBUG_SOURCE_API: case GL_DEBUG_SOURCE_API:
@ -148,15 +95,13 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_},
state_tracker{}, program_manager{device}, state_tracker{}, program_manager{device},
rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, rasterizer(emu_window, gpu, device_memory, device, program_manager, state_tracker) {
state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT); glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr); glDebugMessageCallback(DebugHandler, nullptr);
} }
AddTelemetryFields(); AddTelemetryFields();
InitOpenGLObjects();
// Initialize default attributes to match hardware's disabled attributes // Initialize default attributes to match hardware's disabled attributes
GLint max_attribs{}; GLint max_attribs{};
@ -168,27 +113,27 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
} }
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
// Enable unified vertex attributes when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) { if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
&vertex_buffer_address);
} }
blit_screen = std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker,
program_manager, device);
} }
RendererOpenGL::~RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default;
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { void RendererOpenGL::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) {
if (!framebuffer) { if (framebuffers.empty()) {
return; return;
} }
PrepareRendertarget(framebuffer);
RenderScreenshot(); RenderScreenshot(framebuffers);
state_tracker.BindFramebuffer(0); state_tracker.BindFramebuffer(0);
DrawScreen(emu_window.GetFramebufferLayout()); blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout());
++m_current_frame; ++m_current_frame;
@ -199,172 +144,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.OnFrameDisplayed(); render_window.OnFrameDisplayed();
} }
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
// If framebuffer is provided, reload it from memory to a texture
if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
screen_info.texture.pixel_format != framebuffer->pixel_format ||
gl_framebuffer_data.empty()) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
// Load the framebuffer from memory, draw it to the screen, and swap buffers
LoadFBToScreenInfo(*framebuffer);
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
framebuffer_width = framebuffer.width;
framebuffer_height = framebuffer.height;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
screen_info.was_accelerated =
rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride);
if (screen_info.was_accelerated) {
return;
}
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
const std::span<const u8> input_data(host_ptr, size_in_bytes);
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
framebuffer.width, framebuffer.height, 1, block_height_log2,
0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
// TODO: Test what happens on hardware when you change the framebuffer dimensions so that
// they differ from the LCD resolution.
// TODO: Applications could theoretically crash yuzu here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
framebuffer.height, screen_info.texture.gl_format,
screen_info.texture.gl_type, gl_framebuffer_data.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
}
void RendererOpenGL::InitOpenGLObjects() {
// Create shader programs
fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
const auto replace_include = [](std::string& shader_source, std::string_view include_name,
std::string_view include_content) {
const std::string include_string = fmt::format("#include \"{}\"", include_name);
const std::size_t pos = shader_source.find(include_string);
ASSERT(pos != std::string::npos);
shader_source.replace(pos, include_string.size(), include_content);
};
const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) {
std::string shader_source{specialized_source};
replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL);
return CreateProgram(shader_source, stage);
};
smaa_edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER);
smaa_edge_detection_frag =
SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER);
smaa_blending_weight_calculation_vert =
SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER);
smaa_blending_weight_calculation_frag =
SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER);
smaa_neighborhood_blending_vert =
SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER);
smaa_neighborhood_blending_frag =
SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER);
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER);
present_gaussian_fragment =
CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
present_scaleforce_fragment =
CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
GL_FRAGMENT_SHADER);
std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG};
replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H);
replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H);
std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG};
std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG};
replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source,
fsr_rcas_frag_source);
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
present_sampler_nn.Create();
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
// Generate VBO handle for drawing
vertex_buffer.Create();
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
// Allocate textures for the screen
screen_info.texture.resource.Create(GL_TEXTURE_2D);
const GLuint texture = screen_info.texture.resource.handle;
glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
screen_info.display_texture = screen_info.texture.resource.handle;
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
aa_framebuffer.Create();
smaa_area_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(smaa_area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT);
glTextureSubImage2D(smaa_area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG,
GL_UNSIGNED_BYTE, areaTexBytes);
smaa_search_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(smaa_search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT);
glTextureSubImage2D(smaa_search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED,
GL_UNSIGNED_BYTE, searchTexBytes);
}
void RendererOpenGL::AddTelemetryFields() { void RendererOpenGL::AddTelemetryFields() {
const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
@ -380,328 +159,7 @@ void RendererOpenGL::AddTelemetryFields() {
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
} }
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
texture.height = framebuffer.height;
texture.pixel_format = framebuffer.pixel_format;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
GLint internal_format;
switch (framebuffer.pixel_format) {
case Service::android::PixelFormat::Rgba8888:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
case Service::android::PixelFormat::Rgb565:
internal_format = GL_RGB565;
texture.gl_format = GL_RGB;
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
break;
default:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
// static_cast<u32>(framebuffer.pixel_format));
break;
}
texture.resource.Release();
texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
aa_texture.Release();
aa_texture.Create(GL_TEXTURE_2D);
glTextureStorage2D(aa_texture.handle, 1, GL_RGBA16F,
Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0);
smaa_edges_tex.Release();
smaa_edges_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(smaa_edges_tex.handle, 1, GL_RG16F,
Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
smaa_blend_tex.Release();
smaa_blend_tex.Create(GL_TEXTURE_2D);
glTextureStorage2D(smaa_blend_tex.handle, 1, GL_RGBA16F,
Settings::values.resolution_info.ScaleUp(screen_info.texture.width),
Settings::values.resolution_info.ScaleUp(screen_info.texture.height));
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
state_tracker.NotifyCullTest();
state_tracker.NotifyDepthTest();
state_tracker.NotifyStencilTest();
state_tracker.NotifyPolygonOffset();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
state_tracker.NotifyLogicOp();
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_POLYGON_OFFSET_FILL);
glDisable(GL_RASTERIZER_DISCARD);
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthRangeIndexed(0, 0.0, 0.0);
glBindTextureUnit(0, screen_info.display_texture);
auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) {
LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing);
anti_aliasing = Settings::AntiAliasing::None;
Settings::values.anti_aliasing.SetValue(anti_aliasing);
}
if (anti_aliasing != Settings::AntiAliasing::None) {
glEnablei(GL_SCISSOR_TEST, 0);
auto viewport_width = screen_info.texture.width;
auto scissor_width = framebuffer_crop_rect.GetWidth();
if (scissor_width <= 0) {
scissor_width = viewport_width;
}
auto viewport_height = screen_info.texture.height;
auto scissor_height = framebuffer_crop_rect.GetHeight();
if (scissor_height <= 0) {
scissor_height = viewport_height;
}
if (screen_info.was_accelerated) {
viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width);
scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width);
viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height);
scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height);
}
glScissorIndexed(0, 0, 0, scissor_width, scissor_height);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width),
static_cast<GLfloat>(viewport_height));
glBindSampler(0, present_sampler.handle);
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
switch (anti_aliasing) {
case Settings::AntiAliasing::Fxaa: {
program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
} break;
case Settings::AntiAliasing::Smaa: {
glClearColor(0, 0, 0, 0);
glFrontFace(GL_CCW);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle);
glBindSampler(1, present_sampler.handle);
glBindSampler(2, present_sampler.handle);
glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0,
smaa_edges_tex.handle, 0);
glClear(GL_COLOR_BUFFER_BIT);
program_manager.BindPresentPrograms(smaa_edge_detection_vert.handle,
smaa_edge_detection_frag.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindTextureUnit(0, smaa_edges_tex.handle);
glBindTextureUnit(1, smaa_area_tex.handle);
glBindTextureUnit(2, smaa_search_tex.handle);
glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0,
smaa_blend_tex.handle, 0);
glClear(GL_COLOR_BUFFER_BIT);
program_manager.BindPresentPrograms(smaa_blending_weight_calculation_vert.handle,
smaa_blending_weight_calculation_frag.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindTextureUnit(0, screen_info.display_texture);
glBindTextureUnit(1, smaa_blend_tex.handle);
glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0,
aa_texture.handle, 0);
program_manager.BindPresentPrograms(smaa_neighborhood_blending_vert.handle,
smaa_neighborhood_blending_frag.handle);
glDrawArrays(GL_TRIANGLES, 0, 3);
glFrontFace(GL_CW);
} break;
default:
UNREACHABLE();
}
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
glBindTextureUnit(0, aa_texture.handle);
}
glDisablei(GL_SCISSOR_TEST, 0);
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
if (!fsr->AreBuffersInitialized()) {
fsr->InitBuffers();
}
auto crop_rect = framebuffer_crop_rect;
if (crop_rect.GetWidth() == 0) {
crop_rect.right = framebuffer_width;
}
if (crop_rect.GetHeight() == 0) {
crop_rect.bottom = framebuffer_height;
}
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width);
const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height);
glBindSampler(0, present_sampler.handle);
fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect);
} else {
if (fsr->AreBuffersInitialized()) {
fsr->ReleaseBuffers();
}
}
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
const auto fragment_handle = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return present_bilinear_fragment.handle;
case Settings::ScalingFilter::Bicubic:
return present_bicubic_fragment.handle;
case Settings::ScalingFilter::Gaussian:
return present_gaussian_fragment.handle;
case Settings::ScalingFilter::ScaleForce:
return present_scaleforce_fragment.handle;
case Settings::ScalingFilter::Fsr:
return fsr->GetPresentFragmentProgram().handle;
default:
return present_bilinear_fragment.handle;
}
}();
program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle);
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
auto right = texcoords.right;
if (framebuffer_transform_flags != Service::android::BufferTransformFlags::Unset) {
if (framebuffer_transform_flags == Service::android::BufferTransformFlags::FlipV) {
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
} else {
// Other transformations are unsupported
LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
framebuffer_transform_flags);
UNIMPLEMENTED();
}
}
ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
f32 left_start{};
if (framebuffer_crop_rect.Top() > 0) {
left_start = static_cast<f32>(framebuffer_crop_rect.Top()) /
static_cast<f32>(framebuffer_crop_rect.Bottom());
}
f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
f32 scale_v =
static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) {
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.texture.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.texture.height);
}
}
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
!screen_info.was_accelerated) {
scale_u /= Settings::values.resolution_info.up_factor;
scale_v /= Settings::values.resolution_info.up_factor;
}
const auto& screen = layout.screen;
const std::array vertices = {
ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u,
left_start + left * scale_v),
ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u,
left_start + left * scale_v),
ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u,
left_start + right * scale_v),
ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u,
left_start + right * scale_v),
};
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
glDisable(GL_FRAMEBUFFER_SRGB);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glEnableVertexAttribArray(PositionLocation);
glEnableVertexAttribArray(TexCoordLocation);
glVertexAttribDivisor(PositionLocation, 0);
glVertexAttribDivisor(TexCoordLocation, 0);
glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0);
if (device.HasVertexBufferUnifiedMemory()) {
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
sizeof(vertices));
} else {
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
}
if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
glBindSampler(0, present_sampler.handle);
} else {
glBindSampler(0, present_sampler_nn.handle);
}
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
Settings::values.bg_green.GetValue() / 255.0f,
Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// TODO
// program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {
if (!renderer_settings.screenshot_requested) { if (!renderer_settings.screenshot_requested) {
return; return;
} }
@ -723,7 +181,7 @@ void RendererOpenGL::RenderScreenshot() {
glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
DrawScreen(layout); blit_screen->DrawScreen(framebuffers, layout);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glPixelStorei(GL_PACK_ROW_LENGTH, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0);

View file

@ -10,7 +10,6 @@
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
@ -25,37 +24,13 @@ namespace Core::Frontend {
class EmuWindow; class EmuWindow;
} }
namespace Core::Memory {
class Memory;
}
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra { namespace Tegra {
class GPU; class GPU;
} }
namespace OpenGL { namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen class BlitScreen;
struct TextureInfo {
OGLTexture resource;
GLsizei width;
GLsizei height;
GLenum gl_format;
GLenum gl_type;
Service::android::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture{};
bool was_accelerated = false;
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
class RendererOpenGL final : public VideoCore::RendererBase { class RendererOpenGL final : public VideoCore::RendererBase {
public: public:
@ -65,7 +40,7 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_); std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override; ~RendererOpenGL() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override;
VideoCore::RasterizerInterface* ReadRasterizer() override { VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer; return &rasterizer;
@ -76,28 +51,8 @@ public:
} }
private: private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields(); void AddTelemetryFields();
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
/// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
/// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
Core::TelemetrySession& telemetry_session; Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
@ -108,49 +63,9 @@ private:
StateTracker state_tracker; StateTracker state_tracker;
ProgramManager program_manager; ProgramManager program_manager;
RasterizerOpenGL rasterizer; RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
OGLSampler present_sampler_nn;
OGLBuffer vertex_buffer;
OGLProgram fxaa_vertex;
OGLProgram fxaa_fragment;
OGLProgram present_vertex;
OGLProgram present_bilinear_fragment;
OGLProgram present_bicubic_fragment;
OGLProgram present_gaussian_fragment;
OGLProgram present_scaleforce_fragment;
OGLFramebuffer screenshot_framebuffer; OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer std::unique_ptr<BlitScreen> blit_screen;
GLuint64EXT vertex_buffer_address = 0;
/// Display information for Switch screen
ScreenInfo screen_info;
OGLTexture aa_texture;
OGLFramebuffer aa_framebuffer;
OGLProgram smaa_edge_detection_vert;
OGLProgram smaa_blending_weight_calculation_vert;
OGLProgram smaa_neighborhood_blending_vert;
OGLProgram smaa_edge_detection_frag;
OGLProgram smaa_blending_weight_calculation_frag;
OGLProgram smaa_neighborhood_blending_frag;
OGLTexture smaa_area_tex;
OGLTexture smaa_search_tex;
OGLTexture smaa_edges_tex;
OGLTexture smaa_blend_tex;
std::unique_ptr<FSR> fsr;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
Service::android::BufferTransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
u32 framebuffer_width;
u32 framebuffer_height;
}; };
} // namespace OpenGL } // namespace OpenGL

View file

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Scheduler;
class AntiAliasPass {
public:
virtual ~AntiAliasPass() = default;
virtual void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image,
VkImageView* inout_image_view) = 0;
};
class NoAA final : public AntiAliasPass {
public:
void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image,
VkImageView* inout_image_view) override {}
};
} // namespace Vulkan

View file

@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/common_types.h"
#include "video_core/host_shaders/present_bicubic_frag_spv.h"
#include "video_core/host_shaders/present_gaussian_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
#include "video_core/renderer_vulkan/present/filters.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
namespace {
vk::ShaderModule SelectScaleForceShader(const Device& device) {
if (device.IsFloat16Supported()) {
return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
} else {
return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
}
}
} // Anonymous namespace
std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device, VkFormat frame_format) {
return std::make_unique<WindowAdaptPass>(device, frame_format,
CreateNearestNeighborSampler(device),
BuildShader(device, VULKAN_PRESENT_FRAG_SPV));
}
std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat frame_format) {
return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device),
BuildShader(device, VULKAN_PRESENT_FRAG_SPV));
}
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format) {
return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device),
BuildShader(device, PRESENT_BICUBIC_FRAG_SPV));
}
std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device, VkFormat frame_format) {
return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device),
BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV));
}
std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat frame_format) {
return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device),
SelectScaleForceShader(device));
}
} // namespace Vulkan

View file

@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/renderer_vulkan/present/window_adapt_pass.h"
namespace Vulkan {
class MemoryAllocator;
std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device, VkFormat frame_format);
std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat frame_format);
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format);
std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device, VkFormat frame_format);
std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat frame_format);
} // namespace Vulkan

View file

@ -0,0 +1,226 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/settings.h"
#include "video_core/fsr.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_frag_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_frag_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_frag_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_frag_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_vert_spv.h"
#include "video_core/renderer_vulkan/present/fsr.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
using namespace FSR;
using PushConstants = std::array<u32, 4 * 4>;
FSR::FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D extent)
: m_device{device}, m_memory_allocator{memory_allocator},
m_image_count{image_count}, m_extent{extent} {
CreateImages();
CreateRenderPasses();
CreateSampler();
CreateShaders();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayouts();
CreatePipelines();
}
void FSR::CreateImages() {
m_dynamic_images.resize(m_image_count);
for (auto& images : m_dynamic_images) {
images.images[Easu] =
CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.images[Rcas] =
CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Easu] =
CreateWrappedImageView(m_device, images.images[Easu], VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Rcas] =
CreateWrappedImageView(m_device, images.images[Rcas], VK_FORMAT_R16G16B16A16_SFLOAT);
}
}
void FSR::CreateRenderPasses() {
m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
for (auto& images : m_dynamic_images) {
images.framebuffers[Easu] =
CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Easu], m_extent);
images.framebuffers[Rcas] =
CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Rcas], m_extent);
}
}
void FSR::CreateSampler() {
m_sampler = CreateBilinearSampler(m_device);
}
void FSR::CreateShaders() {
m_vert_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_VERT_SPV);
if (m_device.IsFloat16Supported()) {
m_easu_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_EASU_FP16_FRAG_SPV);
m_rcas_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_FRAG_SPV);
} else {
m_easu_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_EASU_FP32_FRAG_SPV);
m_rcas_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_FRAG_SPV);
}
}
void FSR::CreateDescriptorPool() {
// EASU: 1 descriptor
// RCAS: 1 descriptor
// 2 descriptors, 2 descriptor sets per invocation
m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 2 * m_image_count, 2 * m_image_count);
}
void FSR::CreateDescriptorSetLayout() {
m_descriptor_set_layout =
CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
}
void FSR::CreateDescriptorSets() {
std::vector<VkDescriptorSetLayout> layouts(MaxFsrStage, *m_descriptor_set_layout);
for (auto& images : m_dynamic_images) {
images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts);
}
}
void FSR::CreatePipelineLayouts() {
const VkPushConstantRange range{
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.offset = 0,
.size = sizeof(PushConstants),
};
VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = m_descriptor_set_layout.address(),
.pushConstantRangeCount = 1,
.pPushConstantRanges = &range,
};
m_pipeline_layout = m_device.GetLogical().CreatePipelineLayout(ci);
}
void FSR::CreatePipelines() {
m_easu_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout,
std::tie(m_vert_shader, m_easu_shader));
m_rcas_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout,
std::tie(m_vert_shader, m_rcas_shader));
}
void FSR::UpdateDescriptorSets(VkImageView image_view, size_t image_index) {
Images& images = m_dynamic_images[image_index];
std::vector<VkDescriptorImageInfo> image_infos;
std::vector<VkWriteDescriptorSet> updates;
image_infos.reserve(2);
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view,
images.descriptor_sets[Easu], 0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Easu],
images.descriptor_sets[Rcas], 0));
m_device.GetLogical().UpdateDescriptorSets(updates, {});
}
void FSR::UploadImages(Scheduler& scheduler) {
if (m_images_ready) {
return;
}
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& image : m_dynamic_images) {
ClearColorImage(cmdbuf, *image.images[Easu]);
ClearColorImage(cmdbuf, *image.images[Rcas]);
}
});
scheduler.Finish();
m_images_ready = true;
}
VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image,
VkImageView source_image_view, VkExtent2D input_image_extent,
const Common::Rectangle<f32>& crop_rect) {
Images& images = m_dynamic_images[image_index];
VkImage easu_image = *images.images[Easu];
VkImage rcas_image = *images.images[Rcas];
VkDescriptorSet easu_descriptor_set = images.descriptor_sets[Easu];
VkDescriptorSet rcas_descriptor_set = images.descriptor_sets[Rcas];
VkFramebuffer easu_framebuffer = *images.framebuffers[Easu];
VkFramebuffer rcas_framebuffer = *images.framebuffers[Rcas];
VkPipeline easu_pipeline = *m_easu_pipeline;
VkPipeline rcas_pipeline = *m_rcas_pipeline;
VkPipelineLayout pipeline_layout = *m_pipeline_layout;
VkRenderPass renderpass = *m_renderpass;
VkExtent2D extent = m_extent;
const f32 input_image_width = static_cast<f32>(input_image_extent.width);
const f32 input_image_height = static_cast<f32>(input_image_extent.height);
const f32 output_image_width = static_cast<f32>(extent.width);
const f32 output_image_height = static_cast<f32>(extent.height);
const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
const f32 viewport_x = crop_rect.left * input_image_width;
const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
const f32 viewport_y = crop_rect.top * input_image_height;
PushConstants easu_con{};
PushConstants rcas_con{};
FsrEasuConOffset(easu_con.data() + 0, easu_con.data() + 4, easu_con.data() + 8,
easu_con.data() + 12, viewport_width, viewport_height, input_image_width,
input_image_height, output_image_width, output_image_height, viewport_x,
viewport_y);
const float sharpening =
static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
FsrRcasCon(rcas_con.data(), sharpening);
UploadImages(scheduler);
UpdateDescriptorSets(source_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, easu_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, renderpass, easu_framebuffer, extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, easu_pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0,
easu_descriptor_set, {});
cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, easu_con);
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, easu_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, rcas_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, renderpass, rcas_framebuffer, extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, rcas_pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0,
rcas_descriptor_set, {});
cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, rcas_con);
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, rcas_image, VK_IMAGE_LAYOUT_GENERAL);
});
return *images.image_views[Rcas];
}
} // namespace Vulkan

View file

@ -0,0 +1,69 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/math_util.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
class FSR {
public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D extent);
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image,
VkImageView source_image_view, VkExtent2D input_image_extent,
const Common::Rectangle<f32>& crop_rect);
private:
void CreateImages();
void CreateRenderPasses();
void CreateSampler();
void CreateShaders();
void CreateDescriptorPool();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayouts();
void CreatePipelines();
void UploadImages(Scheduler& scheduler);
void UpdateDescriptorSets(VkImageView image_view, size_t image_index);
const Device& m_device;
MemoryAllocator& m_memory_allocator;
const size_t m_image_count;
const VkExtent2D m_extent;
enum FsrStage {
Easu,
Rcas,
MaxFsrStage,
};
vk::DescriptorPool m_descriptor_pool;
vk::DescriptorSetLayout m_descriptor_set_layout;
vk::PipelineLayout m_pipeline_layout;
vk::ShaderModule m_vert_shader;
vk::ShaderModule m_easu_shader;
vk::ShaderModule m_rcas_shader;
vk::Pipeline m_easu_pipeline;
vk::Pipeline m_rcas_pipeline;
vk::RenderPass m_renderpass;
vk::Sampler m_sampler;
struct Images {
vk::DescriptorSets descriptor_sets;
std::array<vk::Image, MaxFsrStage> images;
std::array<vk::ImageView, MaxFsrStage> image_views;
std::array<vk::Framebuffer, MaxFsrStage> framebuffers;
};
std::vector<Images> m_dynamic_images;
bool m_images_ready{};
};
} // namespace Vulkan

View file

@ -0,0 +1,148 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/common_types.h"
#include "video_core/host_shaders/fxaa_frag_spv.h"
#include "video_core/host_shaders/fxaa_vert_spv.h"
#include "video_core/renderer_vulkan/present/fxaa.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
FXAA::FXAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent)
: m_device(device), m_allocator(allocator), m_extent(extent),
m_image_count(static_cast<u32>(image_count)) {
CreateImages();
CreateRenderPasses();
CreateSampler();
CreateShaders();
CreateDescriptorPool();
CreateDescriptorSetLayouts();
CreateDescriptorSets();
CreatePipelineLayouts();
CreatePipelines();
}
FXAA::~FXAA() = default;
void FXAA::CreateImages() {
for (u32 i = 0; i < m_image_count; i++) {
Image& image = m_dynamic_images.emplace_back();
image.image = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
image.image_view =
CreateWrappedImageView(m_device, image.image, VK_FORMAT_R16G16B16A16_SFLOAT);
}
}
void FXAA::CreateRenderPasses() {
m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
for (auto& image : m_dynamic_images) {
image.framebuffer =
CreateWrappedFramebuffer(m_device, m_renderpass, image.image_view, m_extent);
}
}
void FXAA::CreateSampler() {
m_sampler = CreateWrappedSampler(m_device);
}
void FXAA::CreateShaders() {
m_vertex_shader = CreateWrappedShaderModule(m_device, FXAA_VERT_SPV);
m_fragment_shader = CreateWrappedShaderModule(m_device, FXAA_FRAG_SPV);
}
void FXAA::CreateDescriptorPool() {
// 2 descriptors, 1 descriptor set per image
m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 2 * m_image_count, m_image_count);
}
void FXAA::CreateDescriptorSetLayouts() {
m_descriptor_set_layout =
CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
}
void FXAA::CreateDescriptorSets() {
VkDescriptorSetLayout layout = *m_descriptor_set_layout;
for (auto& images : m_dynamic_images) {
images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, {layout});
}
}
void FXAA::CreatePipelineLayouts() {
m_pipeline_layout = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layout);
}
void FXAA::CreatePipelines() {
m_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout,
std::tie(m_vertex_shader, m_fragment_shader));
}
void FXAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) {
Image& image = m_dynamic_images[image_index];
std::vector<VkDescriptorImageInfo> image_infos;
std::vector<VkWriteDescriptorSet> updates;
image_infos.reserve(2);
updates.push_back(
CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, image.descriptor_sets[0], 0));
updates.push_back(
CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, image.descriptor_sets[0], 1));
m_device.GetLogical().UpdateDescriptorSets(updates, {});
}
void FXAA::UploadImages(Scheduler& scheduler) {
if (m_images_ready) {
return;
}
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& image : m_dynamic_images) {
ClearColorImage(cmdbuf, *image.image);
}
});
scheduler.Finish();
m_images_ready = true;
}
void FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image,
VkImageView* inout_image_view) {
const Image& image{m_dynamic_images[image_index]};
const VkImage input_image{*inout_image};
const VkImage output_image{*image.image};
const VkDescriptorSet descriptor_set{image.descriptor_sets[0]};
const VkFramebuffer framebuffer{*image.framebuffer};
const VkRenderPass renderpass{*m_renderpass};
const VkPipeline pipeline{*m_pipeline};
const VkPipelineLayout layout{*m_pipeline_layout};
const VkExtent2D extent{m_extent};
UploadImages(scheduler);
UpdateDescriptorSets(*inout_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, input_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, renderpass, framebuffer, extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
});
*inout_image = *image.image;
*inout_image_view = *image.image_view;
}
} // namespace Vulkan

View file

@ -0,0 +1,63 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/renderer_vulkan/present/anti_alias_pass.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
class StagingBufferPool;
class FXAA final : public AntiAliasPass {
public:
explicit FXAA(const Device& device, MemoryAllocator& allocator, size_t image_count,
VkExtent2D extent);
~FXAA() override;
void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image,
VkImageView* inout_image_view) override;
private:
void CreateImages();
void CreateRenderPasses();
void CreateSampler();
void CreateShaders();
void CreateDescriptorPool();
void CreateDescriptorSetLayouts();
void CreateDescriptorSets();
void CreatePipelineLayouts();
void CreatePipelines();
void UpdateDescriptorSets(VkImageView image_view, size_t image_index);
void UploadImages(Scheduler& scheduler);
const Device& m_device;
MemoryAllocator& m_allocator;
const VkExtent2D m_extent;
const u32 m_image_count;
vk::ShaderModule m_vertex_shader{};
vk::ShaderModule m_fragment_shader{};
vk::DescriptorPool m_descriptor_pool{};
vk::DescriptorSetLayout m_descriptor_set_layout{};
vk::PipelineLayout m_pipeline_layout{};
vk::Pipeline m_pipeline{};
vk::RenderPass m_renderpass{};
struct Image {
vk::DescriptorSets descriptor_sets{};
vk::Framebuffer framebuffer{};
vk::Image image{};
vk::ImageView image_view{};
};
std::vector<Image> m_dynamic_images{};
bool m_images_ready{};
vk::Sampler m_sampler{};
};
} // namespace Vulkan

View file

@ -0,0 +1,336 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "common/settings.h"
#include "video_core/framebuffer_config.h"
#include "video_core/renderer_vulkan/present/fsr.h"
#include "video_core/renderer_vulkan/present/fxaa.h"
#include "video_core/renderer_vulkan/present/layer.h"
#include "video_core/renderer_vulkan/present/present_push_constants.h"
#include "video_core/renderer_vulkan/present/smaa.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/textures/decoders.h"
namespace Vulkan {
namespace {
u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
using namespace VideoCore::Surface;
return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
}
std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
return static_cast<std::size_t>(framebuffer.stride) *
static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer);
}
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
case Service::android::PixelFormat::Rgba8888:
case Service::android::PixelFormat::Rgbx8888:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case Service::android::PixelFormat::Rgb565:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
case Service::android::PixelFormat::Bgra8888:
return VK_FORMAT_B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
}
} // Anonymous namespace
Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, size_t image_count_,
VkExtent2D output_size, VkDescriptorSetLayout layout)
: device(device_), memory_allocator(memory_allocator_), scheduler(scheduler_),
device_memory(device_memory_), image_count(image_count_) {
CreateDescriptorPool();
CreateDescriptorSets(layout);
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
CreateFSR(output_size);
}
}
Layer::~Layer() {
ReleaseRawImages();
}
void Layer::ConfigureDraw(PresentPushConstants* out_push_constants,
VkDescriptorSet* out_descriptor_set, RasterizerVulkan& rasterizer,
VkSampler sampler, size_t image_index,
const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout& layout) {
const auto texture_info = rasterizer.AccelerateDisplay(
framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride);
const u32 texture_width = texture_info ? texture_info->width : framebuffer.width;
const u32 texture_height = texture_info ? texture_info->height : framebuffer.height;
const u32 scaled_width = texture_info ? texture_info->scaled_width : texture_width;
const u32 scaled_height = texture_info ? texture_info->scaled_height : texture_height;
const bool use_accelerated = texture_info.has_value();
RefreshResources(framebuffer);
SetAntiAliasPass();
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Wait(resource_ticks[image_index]);
SCOPE_EXIT({ resource_ticks[image_index] = scheduler.CurrentTick(); });
if (!use_accelerated) {
UpdateRawImage(framebuffer, image_index);
}
VkImage source_image = texture_info ? texture_info->image : *raw_images[image_index];
VkImageView source_image_view =
texture_info ? texture_info->image_view : *raw_image_views[image_index];
anti_alias->Draw(scheduler, image_index, &source_image, &source_image_view);
auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height);
const VkExtent2D render_extent{
.width = scaled_width,
.height = scaled_height,
};
if (fsr) {
source_image_view = fsr->Draw(scheduler, image_index, source_image, source_image_view,
render_extent, crop_rect);
crop_rect = {0, 0, 1, 1};
}
SetMatrixData(*out_push_constants, layout);
SetVertexData(*out_push_constants, layout, crop_rect);
UpdateDescriptorSet(source_image_view, sampler, image_index);
*out_descriptor_set = descriptor_sets[image_index];
}
void Layer::CreateDescriptorPool() {
descriptor_pool = CreateWrappedDescriptorPool(device, image_count, image_count);
}
void Layer::CreateDescriptorSets(VkDescriptorSetLayout layout) {
const std::vector layouts(image_count, layout);
descriptor_sets = CreateWrappedDescriptorSets(descriptor_pool, layouts);
}
void Layer::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
const VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = CalculateBufferSize(framebuffer),
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
}
void Layer::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const auto format = GetFormat(framebuffer);
resource_ticks.resize(image_count);
raw_images.resize(image_count);
raw_image_views.resize(image_count);
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] =
CreateWrappedImage(memory_allocator, {framebuffer.width, framebuffer.height}, format);
raw_image_views[i] = CreateWrappedImageView(device, raw_images[i], format);
}
}
void Layer::CreateFSR(VkExtent2D output_size) {
fsr = std::make_unique<FSR>(device, memory_allocator, image_count, output_size);
}
void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
if (framebuffer.width == raw_width && framebuffer.height == raw_height &&
framebuffer.pixel_format == pixel_format && !raw_images.empty()) {
return;
}
raw_width = framebuffer.width;
raw_height = framebuffer.height;
pixel_format = framebuffer.pixel_format;
anti_alias.reset();
ReleaseRawImages();
CreateStagingBuffer(framebuffer);
CreateRawImages(framebuffer);
}
void Layer::SetAntiAliasPass() {
if (anti_alias && anti_alias_setting == Settings::values.anti_aliasing.GetValue()) {
return;
}
anti_alias_setting = Settings::values.anti_aliasing.GetValue();
const VkExtent2D render_area{
.width = Settings::values.resolution_info.ScaleUp(raw_width),
.height = Settings::values.resolution_info.ScaleUp(raw_height),
};
switch (anti_alias_setting) {
case Settings::AntiAliasing::Fxaa:
anti_alias = std::make_unique<FXAA>(device, memory_allocator, image_count, render_area);
break;
case Settings::AntiAliasing::Smaa:
anti_alias = std::make_unique<SMAA>(device, memory_allocator, image_count, render_area);
break;
default:
anti_alias = std::make_unique<NoAA>();
break;
}
}
void Layer::ReleaseRawImages() {
for (const u64 tick : resource_ticks) {
scheduler.Wait(tick);
}
raw_images.clear();
buffer.reset();
}
u64 Layer::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return GetSizeInBytes(framebuffer) * image_count;
}
u64 Layer::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
size_t image_index) const {
return GetSizeInBytes(framebuffer) * image_index;
}
void Layer::SetMatrixData(PresentPushConstants& data,
const Layout::FramebufferLayout& layout) const {
data.modelview_matrix =
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
}
void Layer::SetVertexData(PresentPushConstants& data, const Layout::FramebufferLayout& layout,
const Common::Rectangle<f32>& crop) const {
// Map the coordinates to the screen.
const auto& screen = layout.screen;
const auto x = static_cast<f32>(screen.left);
const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth());
const auto h = static_cast<f32>(screen.GetHeight());
data.vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top);
data.vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top);
data.vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom);
data.vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom);
}
void Layer::UpdateDescriptorSet(VkImageView image_view, VkSampler sampler, size_t image_index) {
const VkDescriptorImageInfo image_info{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet sampler_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write}, {});
}
void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t image_index) {
const std::span<u8> mapped_span = buffer.Mapped();
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
const u64 linear_size{GetSizeInBytes(framebuffer)};
const u64 tiled_size{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {.x = 0, .y = 0, .z = 0},
.imageExtent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.depth = 1,
},
};
scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[index];
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
VkImageMemoryBarrier read_barrier = base_barrier;
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
read_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
VkImageMemoryBarrier write_barrier = base_barrier;
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
write_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, write_barrier);
});
}
} // namespace Vulkan

View file

@ -0,0 +1,92 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/math_util.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace Service::android {
enum class PixelFormat : u32;
}
namespace Settings {
enum class AntiAliasing : u32;
}
namespace Vulkan {
class AntiAliasPass;
class Device;
class FSR;
class MemoryAllocator;
struct PresentPushConstants;
class RasterizerVulkan;
class Scheduler;
class Layer final {
public:
explicit Layer(const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler,
Tegra::MaxwellDeviceMemoryManager& device_memory, size_t image_count,
VkExtent2D output_size, VkDescriptorSetLayout layout);
~Layer();
void ConfigureDraw(PresentPushConstants* out_push_constants,
VkDescriptorSet* out_descriptor_set, RasterizerVulkan& rasterizer,
VkSampler sampler, size_t image_index,
const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout& layout);
private:
void CreateDescriptorPool();
void CreateDescriptorSets(VkDescriptorSetLayout layout);
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void CreateFSR(VkExtent2D output_size);
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void SetAntiAliasPass();
void ReleaseRawImages();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, size_t image_index) const;
void SetMatrixData(PresentPushConstants& data, const Layout::FramebufferLayout& layout) const;
void SetVertexData(PresentPushConstants& data, const Layout::FramebufferLayout& layout,
const Common::Rectangle<f32>& crop) const;
void UpdateDescriptorSet(VkImageView image_view, VkSampler sampler, size_t image_index);
void UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t image_index);
private:
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
Tegra::MaxwellDeviceMemoryManager& device_memory;
const size_t image_count{};
vk::DescriptorPool descriptor_pool{};
vk::DescriptorSets descriptor_sets{};
vk::Buffer buffer{};
std::vector<vk::Image> raw_images{};
std::vector<vk::ImageView> raw_image_views{};
u32 raw_width{};
u32 raw_height{};
Service::android::PixelFormat pixel_format{};
Settings::AntiAliasing anti_alias_setting{};
std::unique_ptr<AntiAliasPass> anti_alias{};
std::unique_ptr<FSR> fsr{};
std::vector<u64> resource_ticks{};
};
} // namespace Vulkan

View file

@ -0,0 +1,34 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_types.h"
namespace Vulkan {
struct ScreenRectVertex {
ScreenRectVertex() = default;
explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
std::array<f32, 2> position;
std::array<f32, 2> tex_coord;
};
static inline std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format off
return { 2.f / width, 0.f, 0.f, 0.f,
0.f, 2.f / height, 0.f, 0.f,
0.f, 0.f, 1.f, 0.f,
-1.f, -1.f, 0.f, 1.f};
// clang-format on
}
struct PresentPushConstants {
std::array<f32, 4 * 4> modelview_matrix;
std::array<ScreenRectVertex, 4> vertices;
};
static_assert(sizeof(PresentPushConstants) <= 128, "Push constants are too large");
} // namespace Vulkan

View file

@ -0,0 +1,277 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <list>
#include "common/assert.h"
#include "common/polyfill_ranges.h"
#include "video_core/renderer_vulkan/present/smaa.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/smaa_area_tex.h"
#include "video_core/smaa_search_tex.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_frag_spv.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_vert_spv.h"
#include "video_core/host_shaders/smaa_edge_detection_frag_spv.h"
#include "video_core/host_shaders/smaa_edge_detection_vert_spv.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag_spv.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert_spv.h"
namespace Vulkan {
SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent)
: m_device(device), m_allocator(allocator), m_extent(extent),
m_image_count(static_cast<u32>(image_count)) {
CreateImages();
CreateRenderPasses();
CreateSampler();
CreateShaders();
CreateDescriptorPool();
CreateDescriptorSetLayouts();
CreateDescriptorSets();
CreatePipelineLayouts();
CreatePipelines();
}
SMAA::~SMAA() = default;
void SMAA::CreateImages() {
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_image_views[Area] =
CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
m_static_image_views[Search] =
CreateWrappedImageView(m_device, m_static_images[Search], VK_FORMAT_R8_UNORM);
for (u32 i = 0; i < m_image_count; i++) {
Images& images = m_dynamic_images.emplace_back();
images.images[Blend] =
CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
images.images[Output] =
CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Blend] =
CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Edges] =
CreateWrappedImageView(m_device, images.images[Edges], VK_FORMAT_R16G16_SFLOAT);
images.image_views[Output] =
CreateWrappedImageView(m_device, images.images[Output], VK_FORMAT_R16G16B16A16_SFLOAT);
}
}
void SMAA::CreateRenderPasses() {
m_renderpasses[EdgeDetection] = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16_SFLOAT);
m_renderpasses[BlendingWeightCalculation] =
CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
m_renderpasses[NeighborhoodBlending] =
CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
for (auto& images : m_dynamic_images) {
images.framebuffers[EdgeDetection] = CreateWrappedFramebuffer(
m_device, m_renderpasses[EdgeDetection], images.image_views[Edges], m_extent);
images.framebuffers[BlendingWeightCalculation] =
CreateWrappedFramebuffer(m_device, m_renderpasses[BlendingWeightCalculation],
images.image_views[Blend], m_extent);
images.framebuffers[NeighborhoodBlending] = CreateWrappedFramebuffer(
m_device, m_renderpasses[NeighborhoodBlending], images.image_views[Output], m_extent);
}
}
void SMAA::CreateSampler() {
m_sampler = CreateWrappedSampler(m_device);
}
void SMAA::CreateShaders() {
// These match the order of the SMAAStage enum
static constexpr std::array vert_shader_sources{
ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV),
ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV),
ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV),
};
static constexpr std::array frag_shader_sources{
ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV),
ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV),
ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV),
};
for (size_t i = 0; i < MaxSMAAStage; i++) {
m_vertex_shaders[i] = CreateWrappedShaderModule(m_device, vert_shader_sources[i]);
m_fragment_shaders[i] = CreateWrappedShaderModule(m_device, frag_shader_sources[i]);
}
}
void SMAA::CreateDescriptorPool() {
// Edge detection: 1 descriptor
// Blending weight calculation: 3 descriptors
// Neighborhood blending: 2 descriptors
// 6 descriptors, 3 descriptor sets per image
m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 6 * m_image_count, 3 * m_image_count);
}
void SMAA::CreateDescriptorSetLayouts() {
m_descriptor_set_layouts[EdgeDetection] =
CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
m_descriptor_set_layouts[BlendingWeightCalculation] =
CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
m_descriptor_set_layouts[NeighborhoodBlending] =
CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
}
void SMAA::CreateDescriptorSets() {
std::vector<VkDescriptorSetLayout> layouts(m_descriptor_set_layouts.size());
std::ranges::transform(m_descriptor_set_layouts, layouts.begin(),
[](auto& layout) { return *layout; });
for (auto& images : m_dynamic_images) {
images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts);
}
}
void SMAA::CreatePipelineLayouts() {
for (size_t i = 0; i < MaxSMAAStage; i++) {
m_pipeline_layouts[i] = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layouts[i]);
}
}
void SMAA::CreatePipelines() {
for (size_t i = 0; i < MaxSMAAStage; i++) {
m_pipelines[i] =
CreateWrappedPipeline(m_device, m_renderpasses[i], m_pipeline_layouts[i],
std::tie(m_vertex_shaders[i], m_fragment_shaders[i]));
}
}
void SMAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) {
Images& images = m_dynamic_images[image_index];
std::vector<VkDescriptorImageInfo> image_infos;
std::vector<VkWriteDescriptorSet> updates;
image_infos.reserve(6);
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view,
images.descriptor_sets[EdgeDetection], 0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Edges],
images.descriptor_sets[BlendingWeightCalculation],
0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Area],
images.descriptor_sets[BlendingWeightCalculation],
1));
updates.push_back(
CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Search],
images.descriptor_sets[BlendingWeightCalculation], 2));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view,
images.descriptor_sets[NeighborhoodBlending], 0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Blend],
images.descriptor_sets[NeighborhoodBlending], 1));
m_device.GetLogical().UpdateDescriptorSets(updates, {});
}
void SMAA::UploadImages(Scheduler& scheduler) {
if (m_images_ready) {
return;
}
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent,
VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes));
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& images : m_dynamic_images) {
for (size_t i = 0; i < MaxDynamicImage; i++) {
ClearColorImage(cmdbuf, *images.images[i]);
}
}
});
scheduler.Finish();
m_images_ready = true;
}
void SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image,
VkImageView* inout_image_view) {
Images& images = m_dynamic_images[image_index];
VkImage input_image = *inout_image;
VkImage output_image = *images.images[Output];
VkImage edges_image = *images.images[Edges];
VkImage blend_image = *images.images[Blend];
VkDescriptorSet edge_detection_descriptor_set = images.descriptor_sets[EdgeDetection];
VkDescriptorSet blending_weight_calculation_descriptor_set =
images.descriptor_sets[BlendingWeightCalculation];
VkDescriptorSet neighborhood_blending_descriptor_set =
images.descriptor_sets[NeighborhoodBlending];
VkFramebuffer edge_detection_framebuffer = *images.framebuffers[EdgeDetection];
VkFramebuffer blending_weight_calculation_framebuffer =
*images.framebuffers[BlendingWeightCalculation];
VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending];
UploadImages(scheduler);
UpdateDescriptorSets(*inout_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, input_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, *m_renderpasses[EdgeDetection], edge_detection_framebuffer,
m_extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[EdgeDetection]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipeline_layouts[EdgeDetection], 0,
edge_detection_descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, *m_renderpasses[BlendingWeightCalculation],
blending_weight_calculation_framebuffer, m_extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipelines[BlendingWeightCalculation]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipeline_layouts[BlendingWeightCalculation], 0,
blending_weight_calculation_descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, *m_renderpasses[NeighborhoodBlending],
neighborhood_blending_framebuffer, m_extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[NeighborhoodBlending]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipeline_layouts[NeighborhoodBlending], 0,
neighborhood_blending_descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
});
*inout_image = *images.images[Output];
*inout_image_view = *images.image_views[Output];
}
} // namespace Vulkan

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include "video_core/renderer_vulkan/present/anti_alias_pass.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"
@ -13,12 +14,14 @@ class Device;
class Scheduler; class Scheduler;
class StagingBufferPool; class StagingBufferPool;
class SMAA { class SMAA final : public AntiAliasPass {
public: public:
explicit SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, explicit SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count,
VkExtent2D extent); VkExtent2D extent);
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, ~SMAA() override;
VkImageView source_image_view);
void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image,
VkImageView* inout_image_view) override;
private: private:
enum SMAAStage { enum SMAAStage {

View file

@ -1,29 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <list>
#include "common/assert.h" #include "common/assert.h"
#include "common/polyfill_ranges.h" #include "common/polyfill_ranges.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_smaa.h"
#include "video_core/smaa_area_tex.h"
#include "video_core/smaa_search_tex.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_frag_spv.h"
#include "video_core/host_shaders/smaa_blending_weight_calculation_vert_spv.h"
#include "video_core/host_shaders/smaa_edge_detection_frag_spv.h"
#include "video_core/host_shaders/smaa_edge_detection_vert_spv.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag_spv.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert_spv.h"
namespace Vulkan { namespace Vulkan {
namespace {
#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage) {
const VkBufferCreateInfo dst_buffer_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
return allocator.CreateBuffer(dst_buffer_info, usage);
}
vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) {
const VkImageCreateInfo image_ci{ const VkImageCreateInfo image_ci{
@ -48,7 +44,7 @@ vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions,
} }
void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { VkImageLayout source_layout) {
constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT};
const VkImageMemoryBarrier barrier{ const VkImageMemoryBarrier barrier{
@ -75,7 +71,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
vk::Image& image, VkExtent2D dimensions, VkFormat format, vk::Image& image, VkExtent2D dimensions, VkFormat format,
std::span<const u8> initial_contents = {}) { std::span<const u8> initial_contents) {
const VkBufferCreateInfo upload_ci = { const VkBufferCreateInfo upload_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
@ -114,6 +110,70 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
scheduler.Finish(); scheduler.Finish();
} }
void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer,
VkExtent3D extent) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkBufferImageCopy copy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset{.x = 0, .y = 0, .z = 0},
.imageExtent{extent},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
memory_write_barrier, nullptr, image_write_barrier);
}
vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@ -131,16 +191,18 @@ vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkF
}); });
} }
vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format) { vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format,
VkImageLayout initial_layout) {
const VkAttachmentDescription attachment{ const VkAttachmentDescription attachment{
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
.format = format, .format = format,
.samples = VK_SAMPLE_COUNT_1_BIT, .samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .loadOp = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
: VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE, .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL, .initialLayout = initial_layout,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL, .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
}; };
@ -200,13 +262,13 @@ vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& r
}); });
} }
vk::Sampler CreateWrappedSampler(const Device& device) { vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter) {
return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ return device.GetLogical().CreateSampler(VkSamplerCreateInfo{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.magFilter = VK_FILTER_LINEAR, .magFilter = filter,
.minFilter = VK_FILTER_LINEAR, .minFilter = filter,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
@ -233,30 +295,34 @@ vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span<const
}); });
} }
vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, u32 max_descriptors, vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, size_t max_descriptors,
u32 max_sets) { size_t max_sets,
const VkDescriptorPoolSize pool_size{ std::initializer_list<VkDescriptorType> types) {
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, std::vector<VkDescriptorPoolSize> pool_sizes(types.size());
.descriptorCount = static_cast<u32>(max_descriptors), for (u32 i = 0; i < types.size(); i++) {
}; pool_sizes[i] = VkDescriptorPoolSize{
.type = std::data(types)[i],
.descriptorCount = static_cast<u32>(max_descriptors),
};
}
return device.GetLogical().CreateDescriptorPool(VkDescriptorPoolCreateInfo{ return device.GetLogical().CreateDescriptorPool(VkDescriptorPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.maxSets = max_sets, .maxSets = static_cast<u32>(max_sets),
.poolSizeCount = 1, .poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = &pool_size, .pPoolSizes = pool_sizes.data(),
}); });
} }
vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(const Device& device, vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(
u32 max_sampler_bindings) { const Device& device, std::initializer_list<VkDescriptorType> types) {
std::vector<VkDescriptorSetLayoutBinding> bindings(max_sampler_bindings); std::vector<VkDescriptorSetLayoutBinding> bindings(types.size());
for (u32 i = 0; i < max_sampler_bindings; i++) { for (size_t i = 0; i < types.size(); i++) {
bindings[i] = { bindings[i] = {
.binding = i, .binding = static_cast<u32>(i),
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorType = std::data(types)[i],
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = nullptr, .pImmutableSamplers = nullptr,
@ -298,7 +364,8 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
vk::PipelineLayout& layout, vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) { std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
bool enable_blending) {
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
{ {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@ -376,7 +443,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
.alphaToOneEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE,
}; };
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment{ constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
.blendEnable = VK_FALSE, .blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
@ -388,6 +455,18 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
}; };
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{
.blendEnable = VK_TRUE,
.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
const VkPipelineColorBlendStateCreateInfo color_blend_ci{ const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
@ -395,7 +474,8 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
.logicOpEnable = VK_FALSE, .logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY, .logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1, .attachmentCount = 1,
.pAttachments = &color_blend_attachment, .pAttachments =
enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
}; };
@ -459,6 +539,56 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>
}; };
} }
vk::Sampler CreateBilinearSampler(const Device& device) {
const VkSamplerCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
return device.GetLogical().CreateSampler(ci);
}
vk::Sampler CreateNearestNeighborSampler(const Device& device) {
const VkSamplerCreateInfo ci_nn{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_NEAREST,
.minFilter = VK_FILTER_NEAREST,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
return device.GetLogical().CreateSampler(ci_nn);
}
void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) {
static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
@ -471,12 +601,12 @@ void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) {
cmdbuf.ClearColorImage(image, VK_IMAGE_LAYOUT_GENERAL, {}, subresources); cmdbuf.ClearColorImage(image, VK_IMAGE_LAYOUT_GENERAL, {}, subresources);
} }
void BeginRenderPass(vk::CommandBuffer& cmdbuf, vk::RenderPass& render_pass, void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer,
VkFramebuffer framebuffer, VkExtent2D extent) { VkExtent2D extent) {
const VkRenderPassBeginInfo renderpass_bi{ const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr, .pNext = nullptr,
.renderPass = *render_pass, .renderPass = render_pass,
.framebuffer = framebuffer, .framebuffer = framebuffer,
.renderArea{ .renderArea{
.offset{}, .offset{},
@ -503,248 +633,4 @@ void BeginRenderPass(vk::CommandBuffer& cmdbuf, vk::RenderPass& render_pass,
cmdbuf.SetScissor(0, scissor); cmdbuf.SetScissor(0, scissor);
} }
} // Anonymous namespace
SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent)
: m_device(device), m_allocator(allocator), m_extent(extent),
m_image_count(static_cast<u32>(image_count)) {
CreateImages();
CreateRenderPasses();
CreateSampler();
CreateShaders();
CreateDescriptorPool();
CreateDescriptorSetLayouts();
CreateDescriptorSets();
CreatePipelineLayouts();
CreatePipelines();
}
void SMAA::CreateImages() {
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_image_views[Area] =
CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
m_static_image_views[Search] =
CreateWrappedImageView(m_device, m_static_images[Search], VK_FORMAT_R8_UNORM);
for (u32 i = 0; i < m_image_count; i++) {
Images& images = m_dynamic_images.emplace_back();
images.images[Blend] =
CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
images.images[Output] =
CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Blend] =
CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Edges] =
CreateWrappedImageView(m_device, images.images[Edges], VK_FORMAT_R16G16_SFLOAT);
images.image_views[Output] =
CreateWrappedImageView(m_device, images.images[Output], VK_FORMAT_R16G16B16A16_SFLOAT);
}
}
void SMAA::CreateRenderPasses() {
m_renderpasses[EdgeDetection] = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16_SFLOAT);
m_renderpasses[BlendingWeightCalculation] =
CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
m_renderpasses[NeighborhoodBlending] =
CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
for (auto& images : m_dynamic_images) {
images.framebuffers[EdgeDetection] = CreateWrappedFramebuffer(
m_device, m_renderpasses[EdgeDetection], images.image_views[Edges], m_extent);
images.framebuffers[BlendingWeightCalculation] =
CreateWrappedFramebuffer(m_device, m_renderpasses[BlendingWeightCalculation],
images.image_views[Blend], m_extent);
images.framebuffers[NeighborhoodBlending] = CreateWrappedFramebuffer(
m_device, m_renderpasses[NeighborhoodBlending], images.image_views[Output], m_extent);
}
}
void SMAA::CreateSampler() {
m_sampler = CreateWrappedSampler(m_device);
}
void SMAA::CreateShaders() {
// These match the order of the SMAAStage enum
static constexpr std::array vert_shader_sources{
ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV),
ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV),
ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV),
};
static constexpr std::array frag_shader_sources{
ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV),
ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV),
ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV),
};
for (size_t i = 0; i < MaxSMAAStage; i++) {
m_vertex_shaders[i] = CreateWrappedShaderModule(m_device, vert_shader_sources[i]);
m_fragment_shaders[i] = CreateWrappedShaderModule(m_device, frag_shader_sources[i]);
}
}
void SMAA::CreateDescriptorPool() {
// Edge detection: 1 descriptor
// Blending weight calculation: 3 descriptors
// Neighborhood blending: 2 descriptors
// 6 descriptors, 3 descriptor sets per image
m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 6 * m_image_count, 3 * m_image_count);
}
void SMAA::CreateDescriptorSetLayouts() {
m_descriptor_set_layouts[EdgeDetection] = CreateWrappedDescriptorSetLayout(m_device, 1);
m_descriptor_set_layouts[BlendingWeightCalculation] =
CreateWrappedDescriptorSetLayout(m_device, 3);
m_descriptor_set_layouts[NeighborhoodBlending] = CreateWrappedDescriptorSetLayout(m_device, 2);
}
void SMAA::CreateDescriptorSets() {
std::vector<VkDescriptorSetLayout> layouts(m_descriptor_set_layouts.size());
std::ranges::transform(m_descriptor_set_layouts, layouts.begin(),
[](auto& layout) { return *layout; });
for (auto& images : m_dynamic_images) {
images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts);
}
}
void SMAA::CreatePipelineLayouts() {
for (size_t i = 0; i < MaxSMAAStage; i++) {
m_pipeline_layouts[i] = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layouts[i]);
}
}
void SMAA::CreatePipelines() {
for (size_t i = 0; i < MaxSMAAStage; i++) {
m_pipelines[i] =
CreateWrappedPipeline(m_device, m_renderpasses[i], m_pipeline_layouts[i],
std::tie(m_vertex_shaders[i], m_fragment_shaders[i]));
}
}
void SMAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) {
Images& images = m_dynamic_images[image_index];
std::vector<VkDescriptorImageInfo> image_infos;
std::vector<VkWriteDescriptorSet> updates;
image_infos.reserve(6);
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view,
images.descriptor_sets[EdgeDetection], 0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Edges],
images.descriptor_sets[BlendingWeightCalculation],
0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Area],
images.descriptor_sets[BlendingWeightCalculation],
1));
updates.push_back(
CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Search],
images.descriptor_sets[BlendingWeightCalculation], 2));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view,
images.descriptor_sets[NeighborhoodBlending], 0));
updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Blend],
images.descriptor_sets[NeighborhoodBlending], 1));
m_device.GetLogical().UpdateDescriptorSets(updates, {});
}
void SMAA::UploadImages(Scheduler& scheduler) {
if (m_images_ready) {
return;
}
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent,
VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes));
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& images : m_dynamic_images) {
for (size_t i = 0; i < MaxDynamicImage; i++) {
ClearColorImage(cmdbuf, *images.images[i]);
}
}
});
scheduler.Finish();
m_images_ready = true;
}
VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image,
VkImageView source_image_view) {
Images& images = m_dynamic_images[image_index];
VkImage output_image = *images.images[Output];
VkImage edges_image = *images.images[Edges];
VkImage blend_image = *images.images[Blend];
VkDescriptorSet edge_detection_descriptor_set = images.descriptor_sets[EdgeDetection];
VkDescriptorSet blending_weight_calculation_descriptor_set =
images.descriptor_sets[BlendingWeightCalculation];
VkDescriptorSet neighborhood_blending_descriptor_set =
images.descriptor_sets[NeighborhoodBlending];
VkFramebuffer edge_detection_framebuffer = *images.framebuffers[EdgeDetection];
VkFramebuffer blending_weight_calculation_framebuffer =
*images.framebuffers[BlendingWeightCalculation];
VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending];
UploadImages(scheduler);
UpdateDescriptorSets(source_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
m_extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[EdgeDetection]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipeline_layouts[EdgeDetection], 0,
edge_detection_descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[BlendingWeightCalculation],
blending_weight_calculation_framebuffer, m_extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipelines[BlendingWeightCalculation]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipeline_layouts[BlendingWeightCalculation], 0,
blending_weight_calculation_descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[NeighborhoodBlending],
neighborhood_blending_framebuffer, m_extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[NeighborhoodBlending]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS,
*m_pipeline_layouts[NeighborhoodBlending], 0,
neighborhood_blending_descriptor_set, {});
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
});
return *images.image_views[Output];
}
} // namespace Vulkan } // namespace Vulkan

View file

@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0])))
vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage);
vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format);
void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL);
void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
vk::Image& image, VkExtent2D dimensions, VkFormat format,
std::span<const u8> initial_contents = {});
void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer,
VkExtent3D extent);
void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image);
vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format);
vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format,
VkImageLayout initial_layout = VK_IMAGE_LAYOUT_GENERAL);
vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& render_pass,
vk::ImageView& dest_image, VkExtent2D extent);
vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter = VK_FILTER_LINEAR);
vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span<const u32> code);
vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, size_t max_descriptors,
size_t max_sets,
std::initializer_list<VkDescriptorType> types = {
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(
const Device& device, std::initializer_list<VkDescriptorType> types);
vk::DescriptorSets CreateWrappedDescriptorSets(vk::DescriptorPool& pool,
vk::Span<VkDescriptorSetLayout> layouts);
vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
vk::DescriptorSetLayout& layout);
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
bool enable_blending = false);
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
VkSampler sampler, VkImageView view,
VkDescriptorSet set, u32 binding);
vk::Sampler CreateBilinearSampler(const Device& device);
vk::Sampler CreateNearestNeighborSampler(const Device& device);
void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer,
VkExtent2D extent);
} // namespace Vulkan

View file

@ -0,0 +1,137 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/frontend/framebuffer_layout.h"
#include "video_core/framebuffer_config.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/renderer_vulkan/present/layer.h"
#include "video_core/renderer_vulkan/present/present_push_constants.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/present/window_adapt_pass.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace Vulkan {
WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format,
vk::Sampler&& sampler_, vk::ShaderModule&& fragment_shader_)
: device(device_), sampler(std::move(sampler_)), fragment_shader(std::move(fragment_shader_)) {
CreateDescriptorSetLayout();
CreatePipelineLayout();
CreateVertexShader();
CreateRenderPass(frame_format);
CreatePipeline();
}
WindowAdaptPass::~WindowAdaptPass() = default;
void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, size_t image_index,
std::list<Layer>& layers,
std::span<const Tegra::FramebufferConfig> configs,
const Layout::FramebufferLayout& layout, Frame* dst) {
const VkFramebuffer host_framebuffer{*dst->framebuffer};
const VkRenderPass renderpass{*render_pass};
const VkPipeline graphics_pipeline{*pipeline};
const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout};
const VkExtent2D render_area{
.width = dst->width,
.height = dst->height,
};
const size_t layer_count = configs.size();
std::vector<PresentPushConstants> push_constants(layer_count);
std::vector<VkDescriptorSet> descriptor_sets(layer_count);
auto layer_it = layers.begin();
for (size_t i = 0; i < layer_count; i++) {
layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler,
image_index, configs[i], layout);
layer_it++;
}
scheduler.Record([=](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearAttachment clear_attachment{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.colorAttachment = 0,
.clearValue =
{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
},
};
const VkClearRect clear_rect{
.rect =
{
.offset = {0, 0},
.extent = render_area,
},
.baseArrayLayer = 0,
.layerCount = 1,
};
BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area);
cmdbuf.ClearAttachments({clear_attachment}, {clear_rect});
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
for (size_t i = 0; i < layer_count; i++) {
cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT,
push_constants[i]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0,
descriptor_sets[i], {});
cmdbuf.Draw(4, 1, 0, 0);
}
cmdbuf.EndRenderPass();
});
}
VkDescriptorSetLayout WindowAdaptPass::GetDescriptorSetLayout() {
return *descriptor_set_layout;
}
VkRenderPass WindowAdaptPass::GetRenderPass() {
return *render_pass;
}
void WindowAdaptPass::CreateDescriptorSetLayout() {
descriptor_set_layout =
CreateWrappedDescriptorSetLayout(device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
}
void WindowAdaptPass::CreatePipelineLayout() {
const VkPushConstantRange range{
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.offset = 0,
.size = sizeof(PresentPushConstants),
};
pipeline_layout = device.GetLogical().CreatePipelineLayout(VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 1,
.pPushConstantRanges = &range,
});
}
void WindowAdaptPass::CreateVertexShader() {
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
}
void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) {
render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED);
}
void WindowAdaptPass::CreatePipeline() {
pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
std::tie(vertex_shader, fragment_shader), false);
}
} // namespace Vulkan

View file

@ -0,0 +1,58 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <list>
#include "common/math_util.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace Vulkan {
class Device;
struct Frame;
class Layer;
class Scheduler;
class RasterizerVulkan;
class WindowAdaptPass final {
public:
explicit WindowAdaptPass(const Device& device, VkFormat frame_format, vk::Sampler&& sampler,
vk::ShaderModule&& fragment_shader);
~WindowAdaptPass();
void Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, size_t image_index,
std::list<Layer>& layers, std::span<const Tegra::FramebufferConfig> configs,
const Layout::FramebufferLayout& layout, Frame* dst);
VkDescriptorSetLayout GetDescriptorSetLayout();
VkRenderPass GetRenderPass();
private:
void CreateDescriptorSetLayout();
void CreatePipelineLayout();
void CreateVertexShader();
void CreateRenderPass(VkFormat frame_format);
void CreatePipeline();
private:
const Device& device;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Sampler sampler;
vk::ShaderModule vertex_shader;
vk::ShaderModule fragment_shader;
vk::RenderPass render_pass;
vk::Pipeline pipeline;
};
} // namespace Vulkan

View file

@ -20,12 +20,14 @@
#include "core/frontend/graphics_context.h" #include "core/frontend/graphics_context.h"
#include "core/telemetry_session.h" #include "core/telemetry_session.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_debug_callback.h" #include "video_core/vulkan_common/vulkan_debug_callback.h"
#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_instance.h"
@ -97,10 +99,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
render_window.GetFramebufferLayout().height), render_window.GetFramebufferLayout().height),
present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
surface), surface),
blit_screen(device_memory, render_window, device, memory_allocator, swapchain, blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler),
present_manager, scheduler, screen_info), blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler),
rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker,
state_tracker, scheduler) { scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld); turbo_mode.emplace(instance, dld);
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
@ -116,25 +118,22 @@ RendererVulkan::~RendererVulkan() {
void(device.GetLogical().WaitIdle()); void(device.GetLogical().WaitIdle());
} }
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) {
if (!framebuffer) { if (framebuffers.empty()) {
return; return;
} }
SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
if (!render_window.IsShown()) { if (!render_window.IsShown()) {
return; return;
} }
// Update screen info if the framebuffer size has changed.
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
RenderScreenshot(*framebuffer, use_accelerated);
RenderScreenshot(framebuffers);
Frame* frame = present_manager.GetRenderFrame(); Frame* frame = present_manager.GetRenderFrame();
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated); blit_swapchain.DrawToFrame(rasterizer, frame, framebuffers,
render_window.GetFramebufferLayout(), swapchain.GetImageCount(),
swapchain.GetImageViewFormat());
scheduler.Flush(*frame->render_ready); scheduler.Flush(*frame->render_ready);
present_manager.Present(frame); present_manager.Present(frame);
@ -168,143 +167,37 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
} }
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, void Vulkan::RendererVulkan::RenderScreenshot(
bool use_accelerated) { std::span<const Tegra::FramebufferConfig> framebuffers) {
if (!renderer_settings.screenshot_requested) { if (!renderer_settings.screenshot_requested) {
return; return;
} }
constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM};
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_B8G8R8A8_UNORM,
.extent =
{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ auto frame = [&]() {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, Frame f{};
.pNext = nullptr, f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height},
.flags = 0, ScreenshotFormat);
.image = *staging_image, f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat);
.viewType = VK_IMAGE_VIEW_TYPE_2D, f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat);
.format = VK_FORMAT_B8G8R8A8_UNORM, return f;
.components{ }();
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1,
const VkBufferCreateInfo dst_buffer_info{ VK_FORMAT_B8G8R8A8_UNORM);
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, const auto dst_buffer = CreateWrappedBuffer(
.flags = 0, memory_allocator, static_cast<VkDeviceSize>(layout.width * layout.height * 4),
.size = buffer_size, MemoryUsage::Download);
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
const vk::Buffer dst_buffer =
memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) { scheduler.Record([&](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{ DownloadColorImage(cmdbuf, *frame.image, *dst_buffer,
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, VkExtent3D{layout.width, layout.height, 1});
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkBufferImageCopy copy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset{.x = 0, .y = 0, .z = 0},
.imageExtent{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
}); });
// Ensure the copy is fully completed before saving the screenshot // Ensure the copy is fully completed before saving the screenshot
scheduler.Finish(); scheduler.Finish();

View file

@ -46,7 +46,7 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_); std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override; ~RendererVulkan() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override;
VideoCore::RasterizerInterface* ReadRasterizer() override { VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer; return &rasterizer;
@ -59,7 +59,7 @@ public:
private: private:
void Report() const; void Report() const;
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
Core::TelemetrySession& telemetry_session; Core::TelemetrySession& telemetry_session;
Tegra::MaxwellDeviceMemoryManager& device_memory; Tegra::MaxwellDeviceMemoryManager& device_memory;
@ -72,15 +72,14 @@ private:
vk::DebugUtilsMessenger debug_messenger; vk::DebugUtilsMessenger debug_messenger;
vk::SurfaceKHR surface; vk::SurfaceKHR surface;
ScreenInfo screen_info;
Device device; Device device;
MemoryAllocator memory_allocator; MemoryAllocator memory_allocator;
StateTracker state_tracker; StateTracker state_tracker;
Scheduler scheduler; Scheduler scheduler;
Swapchain swapchain; Swapchain swapchain;
PresentManager present_manager; PresentManager present_manager;
BlitScreen blit_screen; BlitScreen blit_swapchain;
BlitScreen blit_screenshot;
RasterizerVulkan rasterizer; RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode; std::optional<TurboMode> turbo_mode;
}; };

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,12 @@
#pragma once #pragma once
#include <list>
#include <memory> #include <memory>
#include "core/frontend/framebuffer_layout.h" #include "core/frontend/framebuffer_layout.h"
#include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/renderer_vulkan/present/layer.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"
@ -14,155 +16,67 @@ namespace Core {
class System; class System;
} }
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra { namespace Tegra {
struct FramebufferConfig; struct FramebufferConfig;
} }
namespace VideoCore { namespace Settings {
class RasterizerInterface; enum class ScalingFilter : u32;
} } // namespace Settings
namespace Service::android {
enum class PixelFormat : u32;
}
namespace Vulkan { namespace Vulkan {
struct ScreenInfo;
class Device; class Device;
class FSR;
class RasterizerVulkan; class RasterizerVulkan;
class Scheduler; class Scheduler;
class SMAA;
class Swapchain;
class PresentManager; class PresentManager;
class WindowAdaptPass;
struct Frame; struct Frame;
struct ScreenInfo { struct FramebufferTextureInfo {
VkImage image{}; VkImage image{};
VkImageView image_view{}; VkImageView image_view{};
u32 width{}; u32 width{};
u32 height{}; u32 height{};
u32 scaled_width{};
u32 scaled_height{};
}; };
class BlitScreen { class BlitScreen {
public: public:
explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, const Device& device,
Core::Frontend::EmuWindow& render_window, const Device& device, MemoryAllocator& memory_allocator, PresentManager& present_manager,
MemoryAllocator& memory_manager, Swapchain& swapchain, Scheduler& scheduler);
PresentManager& present_manager, Scheduler& scheduler,
const ScreenInfo& screen_info);
~BlitScreen(); ~BlitScreen();
void Recreate(); void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
std::span<const Tegra::FramebufferConfig> framebuffers,
const Layout::FramebufferLayout& layout, size_t current_swapchain_image_count,
VkFormat current_swapchain_view_format);
void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, [[nodiscard]] vk::Framebuffer CreateFramebuffer(const Layout::FramebufferLayout& layout,
const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); VkImageView image_view,
VkFormat current_view_format);
void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent, vk::RenderPass& rd);
private: private:
struct BufferData; void WaitIdle();
void SetWindowAdaptPass();
void CreateStaticResources(); vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
void CreateShaders(); VkRenderPass render_pass);
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat format);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
void CreateGraphicsPipeline();
void CreateSampler();
void CreateDynamicResources();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
void CreateSMAA(VkExtent2D smaa_size);
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
Tegra::MaxwellDeviceMemoryManager& device_memory; Tegra::MaxwellDeviceMemoryManager& device_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device; const Device& device;
MemoryAllocator& memory_allocator; MemoryAllocator& memory_allocator;
Swapchain& swapchain;
PresentManager& present_manager; PresentManager& present_manager;
Scheduler& scheduler; Scheduler& scheduler;
std::size_t image_count; std::size_t image_count{};
std::size_t image_index{}; std::size_t image_index{};
const ScreenInfo& screen_info; VkFormat swapchain_view_format{};
vk::ShaderModule vertex_shader; Settings::ScalingFilter scaling_filter{};
vk::ShaderModule fxaa_vertex_shader; std::unique_ptr<WindowAdaptPass> window_adapt{};
vk::ShaderModule fxaa_fragment_shader; std::list<Layer> layers{};
vk::ShaderModule bilinear_fragment_shader;
vk::ShaderModule bicubic_fragment_shader;
vk::ShaderModule gaussian_fragment_shader;
vk::ShaderModule scaleforce_fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Pipeline nearest_neighbor_pipeline;
vk::Pipeline bilinear_pipeline;
vk::Pipeline bicubic_pipeline;
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
std::vector<u64> resource_ticks;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
vk::PipelineLayout aa_pipeline_layout;
vk::Pipeline aa_pipeline;
vk::RenderPass aa_renderpass;
vk::Framebuffer aa_framebuffer;
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
VkFormat framebuffer_view_format;
VkFormat swapchain_view_format;
std::unique_ptr<FSR> fsr;
std::unique_ptr<SMAA> smaa;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -1,420 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/settings.h"
#include "video_core/fsr.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
#include "video_core/renderer_vulkan/vk_fsr.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
using namespace FSR;
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
VkExtent2D output_size_)
: device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_},
output_size{output_size_} {
CreateImages();
CreateSampler();
CreateShaders();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreatePipeline();
}
VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) {
UpdateDescriptorSet(image_index, image_view);
scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = {},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
const f32 input_image_width = static_cast<f32>(input_image_extent.width);
const f32 input_image_height = static_cast<f32>(input_image_extent.height);
const f32 output_image_width = static_cast<f32>(output_size.width);
const f32 output_image_height = static_cast<f32>(output_size.height);
const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
const f32 viewport_x = crop_rect.left * input_image_width;
const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
const f32 viewport_y = crop_rect.top * input_image_height;
std::array<u32, 4 * 4> push_constants;
FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4,
push_constants.data() + 8, push_constants.data() + 12,
viewport_width, viewport_height, input_image_width, input_image_height,
output_image_width, output_image_height, viewport_x, viewport_y);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
fsr_write_barrier.image = *images[image_index];
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier);
}
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_sets[image_index * 2], {});
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
Common::DivCeil(output_size.height, 16u), 1);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
const float sharpening =
static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
FsrRcasCon(push_constants.data(), sharpening);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{
std::array<VkImageMemoryBarrier, 2> barriers;
auto& fsr_read_barrier = barriers[0];
auto& blit_write_barrier = barriers[1];
fsr_read_barrier = base_barrier;
fsr_read_barrier.image = *images[image_index];
fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
blit_write_barrier = base_barrier;
blit_write_barrier.image = *images[image_count + image_index];
blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers);
}
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_sets[image_index * 2 + 1], {});
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
Common::DivCeil(output_size.height, 16u), 1);
{
std::array<VkImageMemoryBarrier, 1> barriers;
auto& blit_read_barrier = barriers[0];
blit_read_barrier = base_barrier;
blit_read_barrier.image = *images[image_count + image_index];
blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers);
}
});
return *image_views[image_count + image_index];
}
void FSR::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = static_cast<u32>(image_count * 2),
},
{
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = static_cast<u32>(image_count * 2),
},
}};
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.maxSets = static_cast<u32>(image_count * 2),
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
}
void FSR::CreateDescriptorSetLayout() {
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = sampler.address(),
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = sampler.address(),
},
}};
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(layout_bindings.size()),
.pBindings = layout_bindings.data(),
};
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
}
void FSR::CreateDescriptorSets() {
const u32 sets = static_cast<u32>(image_count * 2);
const std::vector layouts(sets, *descriptor_set_layout);
const VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *descriptor_pool,
.descriptorSetCount = sets,
.pSetLayouts = layouts.data(),
};
descriptor_sets = descriptor_pool.Allocate(ai);
}
void FSR::CreateImages() {
images.resize(image_count * 2);
image_views.resize(image_count * 2);
for (size_t i = 0; i < image_count * 2; ++i) {
images[i] = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
.extent =
{
.width = output_size.width,
.height = output_size.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *images[i],
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
}
}
void FSR::CreatePipelineLayout() {
VkPushConstantRange push_const{
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = sizeof(std::array<u32, 4 * 4>),
};
VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_const,
};
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
}
void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
const auto fsr_image_view = *image_views[image_index];
const auto blit_image_view = *image_views[image_count + image_index];
const VkDescriptorImageInfo image_info{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkDescriptorImageInfo fsr_image_info{
.sampler = VK_NULL_HANDLE,
.imageView = fsr_image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkDescriptorImageInfo blit_image_info{
.sampler = VK_NULL_HANDLE,
.imageView = blit_image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkWriteDescriptorSet sampler_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index * 2],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
VkWriteDescriptorSet output_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index * 2],
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = &fsr_image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
sampler_write.dstSet = descriptor_sets[image_index * 2 + 1];
sampler_write.pImageInfo = &fsr_image_info;
output_write.dstSet = descriptor_sets[image_index * 2 + 1];
output_write.pImageInfo = &blit_image_info;
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
}
void FSR::CreateSampler() {
const VkSamplerCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
sampler = device.GetLogical().CreateSampler(ci);
}
void FSR::CreateShaders() {
if (device.IsFloat16Supported()) {
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
} else {
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
}
}
void FSR::CreatePipeline() {
VkPipelineShaderStageCreateInfo shader_stage_easu{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *easu_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
VkPipelineShaderStageCreateInfo shader_stage_rcas{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *rcas_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
VkComputePipelineCreateInfo pipeline_ci_easu{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = shader_stage_easu,
.layout = *pipeline_layout,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
};
VkComputePipelineCreateInfo pipeline_ci_rcas{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = shader_stage_rcas,
.layout = *pipeline_layout,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
};
easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu);
rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas);
}
} // namespace Vulkan

View file

@ -1,52 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/math_util.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
class FSR {
public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size);
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect);
private:
void CreateDescriptorPool();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreateImages();
void CreateSampler();
void CreateShaders();
void CreatePipeline();
void CreatePipelineLayout();
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
const Device& device;
MemoryAllocator& memory_allocator;
size_t image_count;
VkExtent2D output_size;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::DescriptorSets descriptor_sets;
vk::PipelineLayout pipeline_layout;
vk::ShaderModule easu_shader;
vk::ShaderModule rcas_shader;
vk::Pipeline easu_pipeline;
vk::Pipeline rcas_pipeline;
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
};
} // namespace Vulkan

View file

@ -165,10 +165,9 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::MaxwellDeviceMemoryManager& device_memory_,
ScreenInfo& screen_info_, const Device& device_, const Device& device_, MemoryAllocator& memory_allocator_,
MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, StateTracker& state_tracker_, Scheduler& scheduler_)
Scheduler& scheduler_) : gpu{gpu_}, device_memory{device_memory_}, device{device_},
: gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_},
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
@ -783,23 +782,29 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
query_cache.InvalidateRegion(*cpu_addr, copy_size); query_cache.InvalidateRegion(*cpu_addr, copy_size);
} }
bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay(
DAddr framebuffer_addr, u32 pixel_stride) { const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) { if (!framebuffer_addr) {
return false; return {};
} }
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view = const auto [image_view, scaled] =
texture_cache.TryFindFramebufferImageView(config, framebuffer_addr); texture_cache.TryFindFramebufferImageView(config, framebuffer_addr);
if (!image_view) { if (!image_view) {
return false; return {};
} }
query_cache.NotifySegment(false); query_cache.NotifySegment(false);
screen_info.image = image_view->ImageHandle();
screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); const auto& resolution = Settings::values.resolution_info;
screen_info.width = image_view->size.width;
screen_info.height = image_view->size.height; FramebufferTextureInfo info{};
return true; info.image = image_view->ImageHandle();
info.image_view = image_view->Handle(Shader::TextureType::Color2D);
info.width = image_view->size.width;
info.height = image_view->size.height;
info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width;
info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height;
return info;
} }
void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,

View file

@ -43,7 +43,7 @@ class Maxwell3D;
namespace Vulkan { namespace Vulkan {
struct ScreenInfo; struct FramebufferTextureInfo;
class StateTracker; class StateTracker;
@ -78,9 +78,8 @@ class RasterizerVulkan final : public VideoCore::RasterizerInterface,
public: public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::MaxwellDeviceMemoryManager& device_memory_,
ScreenInfo& screen_info_, const Device& device_, const Device& device_, MemoryAllocator& memory_allocator_,
MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, StateTracker& state_tracker_, Scheduler& scheduler_);
Scheduler& scheduler_);
~RasterizerVulkan() override; ~RasterizerVulkan() override;
void Draw(bool is_indexed, u32 instance_count) override; void Draw(bool is_indexed, u32 instance_count) override;
@ -126,8 +125,6 @@ public:
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override; std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading, void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override; const VideoCore::DiskResourceLoadCallback& callback) override;
@ -137,6 +134,10 @@ public:
void ReleaseChannel(s32 channel_id) override; void ReleaseChannel(s32 channel_id) override;
std::optional<FramebufferTextureInfo> AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr,
u32 pixel_stride);
private: private:
static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGES = 48;
@ -182,7 +183,6 @@ private:
Tegra::GPU& gpu; Tegra::GPU& gpu;
Tegra::MaxwellDeviceMemoryManager& device_memory; Tegra::MaxwellDeviceMemoryManager& device_memory;
ScreenInfo& screen_info;
const Device& device; const Device& device;
MemoryAllocator& memory_allocator; MemoryAllocator& memory_allocator;
StateTracker& state_tracker; StateTracker& state_tracker;

View file

@ -713,12 +713,12 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
} }
template <class P> template <class P>
typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( std::pair<typename P::ImageView*, bool> TextureCache<P>::TryFindFramebufferImageView(
const Tegra::FramebufferConfig& config, DAddr cpu_addr) { const Tegra::FramebufferConfig& config, DAddr cpu_addr) {
// TODO: Properly implement this // TODO: Properly implement this
const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS);
if (it == page_table.end()) { if (it == page_table.end()) {
return nullptr; return {};
} }
const auto& image_map_ids = it->second; const auto& image_map_ids = it->second;
boost::container::small_vector<ImageId, 4> valid_image_ids; boost::container::small_vector<ImageId, 4> valid_image_ids;
@ -747,7 +747,8 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(
const auto GetImageViewForFramebuffer = [&](ImageId image_id) { const auto GetImageViewForFramebuffer = [&](ImageId image_id) {
const ImageViewInfo info{ImageViewType::e2D, view_format}; const ImageViewInfo info{ImageViewType::e2D, view_format};
return &slot_image_views[FindOrEmplaceImageView(image_id, info)]; return std::make_pair(&slot_image_views[FindOrEmplaceImageView(image_id, info)],
slot_images[image_id].IsRescaled());
}; };
if (valid_image_ids.size() == 1) [[likely]] { if (valid_image_ids.size() == 1) [[likely]] {
@ -761,7 +762,7 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(
return GetImageViewForFramebuffer(*most_recent); return GetImageViewForFramebuffer(*most_recent);
} }
return nullptr; return {};
} }
template <class P> template <class P>

View file

@ -212,8 +212,8 @@ public:
const Tegra::Engines::Fermi2D::Config& copy); const Tegra::Engines::Fermi2D::Config& copy);
/// Try to find a cached image view in the given CPU address /// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, [[nodiscard]] std::pair<ImageView*, bool> TryFindFramebufferImageView(
DAddr cpu_addr); const Tegra::FramebufferConfig& config, DAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded /// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept; [[nodiscard]] bool HasUncommittedFlushes() const noexcept;