From 2c27127d04a155fe0f893e84263d58f14473785d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Dec 2020 01:02:06 -0500 Subject: [PATCH 1/3] nvdec syncpt incorporation laying the groundwork for async gpu, although this does not fully implement async nvdec operations --- .../service/nvdrv/devices/nvhost_nvdec.cpp | 5 ++-- .../hle/service/nvdrv/devices/nvhost_nvdec.h | 3 ++- .../nvdrv/devices/nvhost_nvdec_common.cpp | 26 ++++++++++++++----- .../nvdrv/devices/nvhost_nvdec_common.h | 14 +++++++--- .../hle/service/nvdrv/devices/nvhost_vic.cpp | 5 ++-- .../hle/service/nvdrv/devices/nvhost_vic.h | 4 +-- src/core/hle/service/nvdrv/nvdrv.cpp | 6 +++-- src/video_core/cdma_pusher.cpp | 15 +++++------ src/video_core/cdma_pusher.h | 10 +++---- src/video_core/command_classes/host1x.cpp | 6 +++-- .../command_classes/sync_manager.cpp | 2 +- 11 files changed, 59 insertions(+), 37 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index d8735491cb..36970f828c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -11,8 +11,9 @@ namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev) - : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector& input, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 79b8b6de12..77ef53cdd4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices { class nvhost_nvdec final : public nvhost_nvdec_common { public: - explicit nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_nvdec() override; NvResult Ioctl1(Ioctl command, const std::vector& input, std::vector& output) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b49cecb426..64370ad4c7 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -11,6 +11,7 @@ #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" #include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" @@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::s } } // Anonymous namespace -nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {} nvhost_nvdec_common::~nvhost_nvdec_common() = default; NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { @@ -71,10 +73,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vectorGetObject(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); @@ -89,6 +95,10 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& input, std::vector& input, std::ve std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); - // We found that implementing this causes deadlocks with async gpu, along with degraded - // performance. TODO: RE the nvdec async implementation - params.value = 0; + if (device_syncpoints[params.param] == 0) { + device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); + } + params.value = device_syncpoints[params.param]; std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index d9f95ba58e..4c9d4ba416 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -10,12 +10,16 @@ #include "common/swap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" -namespace Service::Nvidia::Devices { +namespace Service::Nvidia { +class SyncpointManager; + +namespace Devices { class nvmap; class nvhost_nvdec_common : public nvdevice { public: - explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_nvdec_common() override; protected: @@ -157,8 +161,10 @@ protected: s32_le nvmap_fd{}; u32_le submit_timeout{}; std::shared_ptr nvmap_dev; - + SyncpointManager& syncpoint_manager; + std::array device_syncpoints{}; // This is expected to be ordered, therefore we must use a map, not unordered_map std::map buffer_mappings; }; -}; // namespace Service::Nvidia::Devices +}; // namespace Devices +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 805fe86ae6..72499654c7 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -10,8 +10,9 @@ #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev) - : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} nvhost_vic::~nvhost_vic() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index b2e11f4d4b..f401c61fa5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -7,11 +7,11 @@ #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { -class nvmap; class nvhost_vic final : public nvhost_nvdec_common { public: - explicit nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_vic(); NvResult Ioctl1(Ioctl command, const std::vector& input, std::vector& output) override; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index e03195afe4..620c187282 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { devices["/dev/nvdisp_disp0"] = std::make_shared(system, nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared(system, events_interface, syncpoint_manager); - devices["/dev/nvhost-nvdec"] = std::make_shared(system, nvmap_dev); + devices["/dev/nvhost-nvdec"] = + std::make_shared(system, nvmap_dev, syncpoint_manager); devices["/dev/nvhost-nvjpg"] = std::make_shared(system); - devices["/dev/nvhost-vic"] = std::make_shared(system, nvmap_dev); + devices["/dev/nvhost-vic"] = + std::make_shared(system, nvmap_dev, syncpoint_manager); } Module::~Module() = default; diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index e3e7432f74..94679d5d16 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_) : gpu{gpu_}, nvdec_processor(std::make_shared(gpu)), vic_processor(std::make_unique(gpu, nvdec_processor)), host1x_processor(std::make_unique(gpu)), - nvdec_sync(std::make_unique(gpu)), - vic_sync(std::make_unique(gpu)) {} + sync_manager(std::make_unique(gpu)) {} CDmaPusher::~CDmaPusher() = default; @@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); if (cond == 0) { - nvdec_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - nvdec_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); - nvdec_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast(current_class), syncpoint_id)); } break; } @@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); if (cond == 0) { - vic_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - vic_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); - vic_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast(current_class), syncpoint_id)); } break; } diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 0db1cd6467..8ca70b6dd2 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -116,12 +116,10 @@ private: void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector& arguments); GPU& gpu; - - std::shared_ptr nvdec_processor; - std::unique_ptr vic_processor; - std::unique_ptr host1x_processor; - std::unique_ptr nvdec_sync; - std::unique_ptr vic_sync; + std::shared_ptr nvdec_processor; + std::unique_ptr vic_processor; + std::unique_ptr host1x_processor; + std::unique_ptr sync_manager; ChClassId current_class{}; ThiRegisters vic_thi_state{}; ThiRegisters nvdec_thi_state{}; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index c4dd4881ae..9d0a1b4d94 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -34,6 +34,8 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector& argumen } void Tegra::Host1x::Execute(u32 data) { - // This method waits on a valid syncpoint. - // TODO: Implement when proper Async is in place + u32 syncpointId = (data & 0xFF); + u32 threshold = state.load_syncpoint_payload32; + + gpu.WaitFence(syncpointId, threshold); } diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index 19dc9e0abb..5798577663 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { } void SyncptIncrManager::SignalDone(u32 handle) { - const auto done_incr = + const auto& done_incr = std::find_if(increments.begin(), increments.end(), [handle](const SyncptIncr& incr) { return incr.id == handle; }); if (done_incr != increments.cend()) { From 06cef3355e415be83db3bc6d19b022de0b977580 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Dec 2020 01:21:41 -0500 Subject: [PATCH 2/3] fix for nvdec disabled, cleanup host1x --- .../nvdrv/devices/nvhost_nvdec_common.cpp | 25 +++++----- src/video_core/command_classes/host1x.cpp | 21 ++------ src/video_core/command_classes/host1x.h | 49 ++----------------- 3 files changed, 23 insertions(+), 72 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 64370ad4c7..4898dc27a8 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -73,14 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vectorGetObject(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); @@ -95,11 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& input, std::ve std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); - if (device_syncpoints[params.param] == 0) { + if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) { device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); } params.value = device_syncpoints[params.param]; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index 9d0a1b4d94..b124945287 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} Tegra::Host1x::~Host1x() = default; -void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { - u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u32); - std::memcpy(state_offset, &arguments, sizeof(u32)); -} - -void Tegra::Host1x::ProcessMethod(Method method, const std::vector& arguments) { - StateWrite(static_cast(method), arguments[0]); +void Tegra::Host1x::ProcessMethod(Method method, u32 argument) { switch (method) { - case Method::WaitSyncpt: - Execute(arguments[0]); - break; case Method::LoadSyncptPayload32: - syncpoint_value = arguments[0]; + syncpoint_value = argument; break; + case Method::WaitSyncpt: case Method::WaitSyncpt32: - Execute(arguments[0]); + Execute(argument); break; default: UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast(method)); @@ -34,8 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector& argumen } void Tegra::Host1x::Execute(u32 data) { - u32 syncpointId = (data & 0xFF); - u32 threshold = state.load_syncpoint_payload32; - - gpu.WaitFence(syncpointId, threshold); + gpu.WaitFence(data, syncpoint_value); } diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 013eaa0c12..7e94799dd5 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h @@ -14,64 +14,23 @@ class Nvdec; class Host1x { public: - struct Host1xClassRegisters { - u32 incr_syncpt{}; - u32 incr_syncpt_ctrl{}; - u32 incr_syncpt_error{}; - INSERT_PADDING_WORDS(5); - u32 wait_syncpt{}; - u32 wait_syncpt_base{}; - u32 wait_syncpt_incr{}; - u32 load_syncpt_base{}; - u32 incr_syncpt_base{}; - u32 clear{}; - u32 wait{}; - u32 wait_with_interrupt{}; - u32 delay_use{}; - u32 tick_count_high{}; - u32 tick_count_low{}; - u32 tick_ctrl{}; - INSERT_PADDING_WORDS(23); - u32 ind_ctrl{}; - u32 ind_off2{}; - u32 ind_off{}; - std::array ind_data{}; - INSERT_PADDING_WORDS(1); - u32 load_syncpoint_payload32{}; - u32 stall_ctrl{}; - u32 wait_syncpt32{}; - u32 wait_syncpt_base32{}; - u32 load_syncpt_base32{}; - u32 incr_syncpt_base32{}; - u32 stall_count_high{}; - u32 stall_count_low{}; - u32 xref_ctrl{}; - u32 channel_xref_high{}; - u32 channel_xref_low{}; - }; - static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); - enum class Method : u32 { - WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, - LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, - WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, + WaitSyncpt = 0x8, + LoadSyncptPayload32 = 0x4e, + WaitSyncpt32 = 0x50, }; explicit Host1x(GPU& gpu); ~Host1x(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, const std::vector& arguments); + void ProcessMethod(Method method, u32 argument); private: /// For Host1x, execute is waiting on a syncpoint previously written into the state void Execute(u32 data); - /// Write argument into the provided offset - void StateWrite(u32 offset, u32 arguments); - u32 syncpoint_value{}; - Host1xClassRegisters state{}; GPU& gpu; }; From 16392a23cc864ef0fa8a768584fbcc64fec40f2a Mon Sep 17 00:00:00 2001 From: Ameer J <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Dec 2020 13:53:28 -0500 Subject: [PATCH 3/3] remove inaccurate reference Co-authored-by: LC --- src/video_core/command_classes/sync_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp index 5798577663..19dc9e0abb 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/command_classes/sync_manager.cpp @@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { } void SyncptIncrManager::SignalDone(u32 handle) { - const auto& done_incr = + const auto done_incr = std::find_if(increments.begin(), increments.end(), [handle](const SyncptIncr& incr) { return incr.id == handle; }); if (done_incr != increments.cend()) {