VideoCore: Refactor syncing.
This commit is contained in:
parent
e44ac8b821
commit
668e80a9f4
44 changed files with 648 additions and 252 deletions
|
@ -51,6 +51,7 @@
|
||||||
#include "core/telemetry_session.h"
|
#include "core/telemetry_session.h"
|
||||||
#include "core/tools/freezer.h"
|
#include "core/tools/freezer.h"
|
||||||
#include "network/network.h"
|
#include "network/network.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
|
@ -215,6 +216,7 @@ struct System::Impl {
|
||||||
|
|
||||||
telemetry_session = std::make_unique<Core::TelemetrySession>();
|
telemetry_session = std::make_unique<Core::TelemetrySession>();
|
||||||
|
|
||||||
|
host1x_core = std::make_unique<Tegra::Host1x::Host1x>();
|
||||||
gpu_core = VideoCore::CreateGPU(emu_window, system);
|
gpu_core = VideoCore::CreateGPU(emu_window, system);
|
||||||
if (!gpu_core) {
|
if (!gpu_core) {
|
||||||
return SystemResultStatus::ErrorVideoCore;
|
return SystemResultStatus::ErrorVideoCore;
|
||||||
|
@ -373,6 +375,7 @@ struct System::Impl {
|
||||||
app_loader.reset();
|
app_loader.reset();
|
||||||
audio_core.reset();
|
audio_core.reset();
|
||||||
gpu_core.reset();
|
gpu_core.reset();
|
||||||
|
host1x_core.reset();
|
||||||
perf_stats.reset();
|
perf_stats.reset();
|
||||||
kernel.Shutdown();
|
kernel.Shutdown();
|
||||||
memory.Reset();
|
memory.Reset();
|
||||||
|
@ -450,6 +453,7 @@ struct System::Impl {
|
||||||
/// AppLoader used to load the current executing application
|
/// AppLoader used to load the current executing application
|
||||||
std::unique_ptr<Loader::AppLoader> app_loader;
|
std::unique_ptr<Loader::AppLoader> app_loader;
|
||||||
std::unique_ptr<Tegra::GPU> gpu_core;
|
std::unique_ptr<Tegra::GPU> gpu_core;
|
||||||
|
std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
|
||||||
std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
|
std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
|
||||||
std::unique_ptr<Core::DeviceMemory> device_memory;
|
std::unique_ptr<Core::DeviceMemory> device_memory;
|
||||||
std::unique_ptr<AudioCore::AudioCore> audio_core;
|
std::unique_ptr<AudioCore::AudioCore> audio_core;
|
||||||
|
@ -668,6 +672,14 @@ const Tegra::GPU& System::GPU() const {
|
||||||
return *impl->gpu_core;
|
return *impl->gpu_core;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Tegra::Host1x::Host1x& System::Host1x() {
|
||||||
|
return *impl->host1x_core;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Tegra::Host1x::Host1x& System::Host1x() const {
|
||||||
|
return *impl->host1x_core;
|
||||||
|
}
|
||||||
|
|
||||||
Core::Hardware::InterruptManager& System::InterruptManager() {
|
Core::Hardware::InterruptManager& System::InterruptManager() {
|
||||||
return *impl->interrupt_manager;
|
return *impl->interrupt_manager;
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,6 +74,9 @@ class TimeManager;
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class DebugContext;
|
class DebugContext;
|
||||||
class GPU;
|
class GPU;
|
||||||
|
namespace Host1x {
|
||||||
|
class Host1x;
|
||||||
|
} // namespace Host1x
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
@ -260,6 +263,12 @@ public:
|
||||||
/// Gets an immutable reference to the GPU interface.
|
/// Gets an immutable reference to the GPU interface.
|
||||||
[[nodiscard]] const Tegra::GPU& GPU() const;
|
[[nodiscard]] const Tegra::GPU& GPU() const;
|
||||||
|
|
||||||
|
/// Gets a mutable reference to the Host1x interface
|
||||||
|
[[nodiscard]] Tegra::Host1x::Host1x& Host1x();
|
||||||
|
|
||||||
|
/// Gets an immutable reference to the Host1x interface.
|
||||||
|
[[nodiscard]] const Tegra::Host1x::Host1x& Host1x() const;
|
||||||
|
|
||||||
/// Gets a mutable reference to the renderer.
|
/// Gets a mutable reference to the renderer.
|
||||||
[[nodiscard]] VideoCore::RendererBase& Renderer();
|
[[nodiscard]] VideoCore::RendererBase& Renderer();
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
|
||||||
stride, format, transform, crop_rect};
|
stride, format, transform, crop_rect};
|
||||||
|
|
||||||
system.GetPerfStats().EndSystemFrame();
|
system.GetPerfStats().EndSystemFrame();
|
||||||
system.GPU().SwapBuffers(&framebuffer);
|
system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0);
|
||||||
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
|
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
|
||||||
system.GetPerfStats().BeginSystemFrame();
|
system.GetPerfStats().BeginSystemFrame();
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
|
@ -129,7 +130,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
|
||||||
const u32 target_value = params.fence.value;
|
const u32 target_value = params.fence.value;
|
||||||
|
|
||||||
auto lock = NvEventsLock();
|
auto lock = NvEventsLock();
|
||||||
|
@ -149,7 +150,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
|
||||||
if (events[slot].fails > 2) {
|
if (events[slot].fails > 2) {
|
||||||
{
|
{
|
||||||
auto lk = system.StallProcesses();
|
auto lk = system.StallProcesses();
|
||||||
gpu.WaitFence(fence_id, target_value);
|
host1x_syncpoint_manager.WaitHost(fence_id, target_value);
|
||||||
system.UnstallProcesses();
|
system.UnstallProcesses();
|
||||||
}
|
}
|
||||||
params.value.raw = target_value;
|
params.value.raw = target_value;
|
||||||
|
@ -198,7 +199,15 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
|
||||||
}
|
}
|
||||||
params.value.raw |= slot;
|
params.value.raw |= slot;
|
||||||
|
|
||||||
gpu.RegisterSyncptInterrupt(fence_id, target_value);
|
event.wait_handle =
|
||||||
|
host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() {
|
||||||
|
auto& event = events[slot];
|
||||||
|
if (event.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
|
||||||
|
EventState::Waiting) {
|
||||||
|
event.kevent->GetWritableEvent().Signal();
|
||||||
|
}
|
||||||
|
event.status.store(EventState::Signalled, std::memory_order_release);
|
||||||
|
});
|
||||||
return NvResult::Timeout;
|
return NvResult::Timeout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,8 +297,10 @@ NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::v
|
||||||
auto& event = events[event_id];
|
auto& event = events[event_id];
|
||||||
if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) ==
|
if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) ==
|
||||||
EventState::Waiting) {
|
EventState::Waiting) {
|
||||||
system.GPU().CancelSyncptInterrupt(event.assigned_syncpt, event.assigned_value);
|
auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
|
||||||
|
host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle);
|
||||||
syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt);
|
syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt);
|
||||||
|
event.wait_handle = {};
|
||||||
}
|
}
|
||||||
event.fails++;
|
event.fails++;
|
||||||
event.status.store(EventState::Cancelled, std::memory_order_release);
|
event.status.store(EventState::Cancelled, std::memory_order_release);
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||||
#include "core/hle/service/nvdrv/nvdrv.h"
|
#include "core/hle/service/nvdrv/nvdrv.h"
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::NvCore {
|
namespace Service::Nvidia::NvCore {
|
||||||
class Container;
|
class Container;
|
||||||
|
@ -78,6 +79,9 @@ private:
|
||||||
// Tells if an NVEvent is registered or not
|
// Tells if an NVEvent is registered or not
|
||||||
bool registered{};
|
bool registered{};
|
||||||
|
|
||||||
|
// Used for waiting on a syncpoint & canceling it.
|
||||||
|
Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{};
|
||||||
|
|
||||||
bool IsBeingUsed() {
|
bool IsBeingUsed() {
|
||||||
const auto current_status = status.load(std::memory_order_acquire);
|
const auto current_status = status.load(std::memory_order_acquire);
|
||||||
return current_status == EventState::Waiting ||
|
return current_status == EventState::Waiting ||
|
||||||
|
|
|
@ -210,10 +210,10 @@ NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::ve
|
||||||
|
|
||||||
static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
|
static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
|
||||||
return {
|
return {
|
||||||
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
|
||||||
Tegra::SubmissionMode::Increasing),
|
Tegra::SubmissionMode::Increasing),
|
||||||
{fence.value},
|
{fence.value},
|
||||||
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
|
||||||
Tegra::SubmissionMode::Increasing),
|
Tegra::SubmissionMode::Increasing),
|
||||||
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
|
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
|
||||||
};
|
};
|
||||||
|
@ -222,12 +222,12 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
|
||||||
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence,
|
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence,
|
||||||
u32 add_increment) {
|
u32 add_increment) {
|
||||||
std::vector<Tegra::CommandHeader> result{
|
std::vector<Tegra::CommandHeader> result{
|
||||||
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
|
||||||
Tegra::SubmissionMode::Increasing),
|
Tegra::SubmissionMode::Increasing),
|
||||||
{}};
|
{}};
|
||||||
|
|
||||||
for (u32 count = 0; count < add_increment; ++count) {
|
for (u32 count = 0; count < add_increment; ++count) {
|
||||||
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
|
||||||
Tegra::SubmissionMode::Increasing));
|
Tegra::SubmissionMode::Increasing));
|
||||||
result.emplace_back(
|
result.emplace_back(
|
||||||
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
|
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
|
||||||
|
@ -239,7 +239,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
|
||||||
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence,
|
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence,
|
||||||
u32 add_increment) {
|
u32 add_increment) {
|
||||||
std::vector<Tegra::CommandHeader> result{
|
std::vector<Tegra::CommandHeader> result{
|
||||||
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
|
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
|
||||||
Tegra::SubmissionMode::Increasing),
|
Tegra::SubmissionMode::Increasing),
|
||||||
{}};
|
{}};
|
||||||
const std::vector<Tegra::CommandHeader> increment{
|
const std::vector<Tegra::CommandHeader> increment{
|
||||||
|
|
|
@ -24,6 +24,8 @@
|
||||||
#include "core/hle/service/vi/layer/vi_layer.h"
|
#include "core/hle/service/vi/layer/vi_layer.h"
|
||||||
#include "core/hle/service/vi/vi_results.h"
|
#include "core/hle/service/vi/vi_results.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
|
|
||||||
namespace Service::NVFlinger {
|
namespace Service::NVFlinger {
|
||||||
|
|
||||||
|
@ -267,12 +269,12 @@ void NVFlinger::Compose() {
|
||||||
return; // We are likely shutting down
|
return; // We are likely shutting down
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
|
||||||
const auto& multi_fence = buffer.fence;
|
const auto& multi_fence = buffer.fence;
|
||||||
guard->unlock();
|
guard->unlock();
|
||||||
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
|
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
|
||||||
const auto& fence = multi_fence.fences[fence_id];
|
const auto& fence = multi_fence.fences[fence_id];
|
||||||
gpu.WaitFence(fence.id, fence.value);
|
syncpoint_manager.WaitGuest(fence.id, fence.value);
|
||||||
}
|
}
|
||||||
guard->lock();
|
guard->lock();
|
||||||
|
|
||||||
|
@ -284,6 +286,7 @@ void NVFlinger::Compose() {
|
||||||
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
|
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
|
||||||
ASSERT(nvdisp);
|
ASSERT(nvdisp);
|
||||||
|
|
||||||
|
guard->unlock();
|
||||||
Common::Rectangle<int> crop_rect{
|
Common::Rectangle<int> crop_rect{
|
||||||
static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()),
|
static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()),
|
||||||
static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};
|
static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};
|
||||||
|
@ -292,6 +295,8 @@ void NVFlinger::Compose() {
|
||||||
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
|
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
|
||||||
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
|
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
|
||||||
|
|
||||||
|
guard->lock();
|
||||||
|
|
||||||
swap_interval = buffer.swap_interval;
|
swap_interval = buffer.swap_interval;
|
||||||
|
|
||||||
auto fence = android::Fence::NoFence();
|
auto fence = android::Fence::NoFence();
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
add_subdirectory(host_shaders)
|
add_subdirectory(host_shaders)
|
||||||
|
|
||||||
if(LIBVA_FOUND)
|
if(LIBVA_FOUND)
|
||||||
set_source_files_properties(command_classes/codecs/codec.cpp
|
set_source_files_properties(host1x/codecs/codec.cpp
|
||||||
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
|
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
|
||||||
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
|
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
@ -15,24 +15,6 @@ add_library(video_core STATIC
|
||||||
buffer_cache/buffer_cache.h
|
buffer_cache/buffer_cache.h
|
||||||
cdma_pusher.cpp
|
cdma_pusher.cpp
|
||||||
cdma_pusher.h
|
cdma_pusher.h
|
||||||
command_classes/codecs/codec.cpp
|
|
||||||
command_classes/codecs/codec.h
|
|
||||||
command_classes/codecs/h264.cpp
|
|
||||||
command_classes/codecs/h264.h
|
|
||||||
command_classes/codecs/vp8.cpp
|
|
||||||
command_classes/codecs/vp8.h
|
|
||||||
command_classes/codecs/vp9.cpp
|
|
||||||
command_classes/codecs/vp9.h
|
|
||||||
command_classes/codecs/vp9_types.h
|
|
||||||
command_classes/host1x.cpp
|
|
||||||
command_classes/host1x.h
|
|
||||||
command_classes/nvdec.cpp
|
|
||||||
command_classes/nvdec.h
|
|
||||||
command_classes/nvdec_common.h
|
|
||||||
command_classes/sync_manager.cpp
|
|
||||||
command_classes/sync_manager.h
|
|
||||||
command_classes/vic.cpp
|
|
||||||
command_classes/vic.h
|
|
||||||
compatible_formats.cpp
|
compatible_formats.cpp
|
||||||
compatible_formats.h
|
compatible_formats.h
|
||||||
control/channel_state.cpp
|
control/channel_state.cpp
|
||||||
|
@ -63,6 +45,26 @@ add_library(video_core STATIC
|
||||||
engines/puller.cpp
|
engines/puller.cpp
|
||||||
engines/puller.h
|
engines/puller.h
|
||||||
framebuffer_config.h
|
framebuffer_config.h
|
||||||
|
host1x/codecs/codec.cpp
|
||||||
|
host1x/codecs/codec.h
|
||||||
|
host1x/codecs/h264.cpp
|
||||||
|
host1x/codecs/h264.h
|
||||||
|
host1x/codecs/vp8.cpp
|
||||||
|
host1x/codecs/vp8.h
|
||||||
|
host1x/codecs/vp9.cpp
|
||||||
|
host1x/codecs/vp9.h
|
||||||
|
host1x/codecs/vp9_types.h
|
||||||
|
host1x/control.cpp
|
||||||
|
host1x/control.h
|
||||||
|
host1x/nvdec.cpp
|
||||||
|
host1x/nvdec.h
|
||||||
|
host1x/nvdec_common.h
|
||||||
|
host1x/sync_manager.cpp
|
||||||
|
host1x/sync_manager.h
|
||||||
|
host1x/syncpoint_manager.cpp
|
||||||
|
host1x/syncpoint_manager.h
|
||||||
|
host1x/vic.cpp
|
||||||
|
host1x/vic.h
|
||||||
macro/macro.cpp
|
macro/macro.cpp
|
||||||
macro/macro.h
|
macro/macro.h
|
||||||
macro/macro_hle.cpp
|
macro/macro_hle.cpp
|
||||||
|
|
|
@ -2,20 +2,22 @@
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
#include <bit>
|
#include <bit>
|
||||||
#include "command_classes/host1x.h"
|
|
||||||
#include "command_classes/nvdec.h"
|
|
||||||
#include "command_classes/vic.h"
|
|
||||||
#include "video_core/cdma_pusher.h"
|
#include "video_core/cdma_pusher.h"
|
||||||
#include "video_core/command_classes/sync_manager.h"
|
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/control.h"
|
||||||
|
#include "video_core/host1x/nvdec.h"
|
||||||
|
#include "video_core/host1x/nvdec_common.h"
|
||||||
|
#include "video_core/host1x/sync_manager.h"
|
||||||
|
#include "video_core/host1x/vic.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
CDmaPusher::CDmaPusher(GPU& gpu_)
|
CDmaPusher::CDmaPusher(GPU& gpu_)
|
||||||
: gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
|
: gpu{gpu_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(gpu)),
|
||||||
vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
|
vic_processor(std::make_unique<Host1x::Vic>(gpu, nvdec_processor)),
|
||||||
host1x_processor(std::make_unique<Host1x>(gpu)),
|
host1x_processor(std::make_unique<Host1x::Control>(gpu)),
|
||||||
sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
|
sync_manager(std::make_unique<Host1x::SyncptIncrManager>(gpu)) {}
|
||||||
|
|
||||||
CDmaPusher::~CDmaPusher() = default;
|
CDmaPusher::~CDmaPusher() = default;
|
||||||
|
|
||||||
|
@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
|
||||||
case ThiMethod::SetMethod1:
|
case ThiMethod::SetMethod1:
|
||||||
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
|
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
|
||||||
static_cast<u32>(vic_thi_state.method_0), data);
|
static_cast<u32>(vic_thi_state.method_0), data);
|
||||||
vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data);
|
vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
|
||||||
|
data);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ChClassId::Host1x:
|
case ChClassId::Control:
|
||||||
// This device is mainly for syncpoint synchronization
|
// This device is mainly for syncpoint synchronization
|
||||||
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
|
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
|
||||||
host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data);
|
host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
|
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
|
||||||
|
|
|
@ -13,10 +13,13 @@
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
class GPU;
|
class GPU;
|
||||||
class Host1x;
|
|
||||||
|
namespace Host1x {
|
||||||
|
class Control;
|
||||||
class Nvdec;
|
class Nvdec;
|
||||||
class SyncptIncrManager;
|
class SyncptIncrManager;
|
||||||
class Vic;
|
class Vic;
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
enum class ChSubmissionMode : u32 {
|
enum class ChSubmissionMode : u32 {
|
||||||
SetClass = 0,
|
SetClass = 0,
|
||||||
|
@ -30,7 +33,7 @@ enum class ChSubmissionMode : u32 {
|
||||||
|
|
||||||
enum class ChClassId : u32 {
|
enum class ChClassId : u32 {
|
||||||
NoClass = 0x0,
|
NoClass = 0x0,
|
||||||
Host1x = 0x1,
|
Control = 0x1,
|
||||||
VideoEncodeMpeg = 0x20,
|
VideoEncodeMpeg = 0x20,
|
||||||
VideoEncodeNvEnc = 0x21,
|
VideoEncodeNvEnc = 0x21,
|
||||||
VideoStreamingVi = 0x30,
|
VideoStreamingVi = 0x30,
|
||||||
|
@ -102,10 +105,10 @@ private:
|
||||||
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
|
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
|
||||||
|
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
|
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
|
||||||
std::unique_ptr<Tegra::Vic> vic_processor;
|
std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
|
||||||
std::unique_ptr<Tegra::Host1x> host1x_processor;
|
std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
|
||||||
std::unique_ptr<SyncptIncrManager> sync_manager;
|
std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
|
||||||
ChClassId current_class{};
|
ChClassId current_class{};
|
||||||
ThiRegisters vic_thi_state{};
|
ThiRegisters vic_thi_state{};
|
||||||
ThiRegisters nvdec_thi_state{};
|
ThiRegisters nvdec_thi_state{};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Copyright 2021 yuzu Emulator Project
|
// Copyright 2021 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv3 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Copyright 2021 yuzu Emulator Project
|
// Copyright 2021 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv3 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv3 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <deque>
|
#include <deque>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Copyright 2021 yuzu Emulator Project
|
// Copyright 2021 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv3 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Copyright 2021 yuzu Emulator Project
|
// Copyright 2021 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv3 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
|
@ -37,24 +37,32 @@ enum class SubmissionMode : u32 {
|
||||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
||||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
||||||
// So the values you see in docs might be multiplied by 4.
|
// So the values you see in docs might be multiplied by 4.
|
||||||
|
// Register documentation:
|
||||||
|
// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/cla26f.h
|
||||||
|
//
|
||||||
|
// Register Description (approx):
|
||||||
|
// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
|
||||||
enum class BufferMethods : u32 {
|
enum class BufferMethods : u32 {
|
||||||
BindObject = 0x0,
|
BindObject = 0x0,
|
||||||
|
Illegal = 0x1,
|
||||||
Nop = 0x2,
|
Nop = 0x2,
|
||||||
SemaphoreAddressHigh = 0x4,
|
SemaphoreAddressHigh = 0x4,
|
||||||
SemaphoreAddressLow = 0x5,
|
SemaphoreAddressLow = 0x5,
|
||||||
SemaphoreSequence = 0x6,
|
SemaphoreSequencePayload = 0x6,
|
||||||
SemaphoreTrigger = 0x7,
|
SemaphoreOperation = 0x7,
|
||||||
NotifyIntr = 0x8,
|
NonStallInterrupt = 0x8,
|
||||||
WrcacheFlush = 0x9,
|
WrcacheFlush = 0x9,
|
||||||
Unk28 = 0xA,
|
MemOpA = 0xA,
|
||||||
UnkCacheFlush = 0xB,
|
MemOpB = 0xB,
|
||||||
|
MemOpC = 0xC,
|
||||||
|
MemOpD = 0xD,
|
||||||
RefCnt = 0x14,
|
RefCnt = 0x14,
|
||||||
SemaphoreAcquire = 0x1A,
|
SemaphoreAcquire = 0x1A,
|
||||||
SemaphoreRelease = 0x1B,
|
SemaphoreRelease = 0x1B,
|
||||||
FenceValue = 0x1C,
|
SyncpointPayload = 0x1C,
|
||||||
FenceAction = 0x1D,
|
SyncpointOperation = 0x1D,
|
||||||
WaitForInterrupt = 0x1E,
|
WaitForIdle = 0x1E,
|
||||||
Unk7c = 0x1F,
|
CRCCheck = 0x1F,
|
||||||
Yield = 0x20,
|
Yield = 0x20,
|
||||||
NonPullerMethods = 0x40,
|
NonPullerMethods = 0x40,
|
||||||
};
|
};
|
||||||
|
|
|
@ -68,11 +68,6 @@ void Puller::ProcessFenceActionMethod() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Puller::ProcessWaitForInterruptMethod() {
|
|
||||||
// TODO(bunnei) ImplementMe
|
|
||||||
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
|
||||||
}
|
|
||||||
|
|
||||||
void Puller::ProcessSemaphoreTriggerMethod() {
|
void Puller::ProcessSemaphoreTriggerMethod() {
|
||||||
const auto semaphoreOperationMask = 0xF;
|
const auto semaphoreOperationMask = 0xF;
|
||||||
const auto op =
|
const auto op =
|
||||||
|
@ -91,29 +86,33 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
||||||
block.timestamp = gpu.GetTicks();
|
block.timestamp = gpu.GetTicks();
|
||||||
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
|
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
|
||||||
} else {
|
} else {
|
||||||
|
do {
|
||||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||||
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
|
|
||||||
(op == GpuSemaphoreOperation::AcquireGequal &&
|
|
||||||
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
|
|
||||||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
|
|
||||||
// Nothing to do in this case
|
|
||||||
} else {
|
|
||||||
regs.acquire_source = true;
|
regs.acquire_source = true;
|
||||||
regs.acquire_value = regs.semaphore_sequence;
|
regs.acquire_value = regs.semaphore_sequence;
|
||||||
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
||||||
regs.acquire_active = true;
|
regs.acquire_active = true;
|
||||||
regs.acquire_mode = false;
|
regs.acquire_mode = false;
|
||||||
|
if (word != regs.acquire_value) {
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
||||||
regs.acquire_active = true;
|
regs.acquire_active = true;
|
||||||
regs.acquire_mode = true;
|
regs.acquire_mode = true;
|
||||||
|
if (word < regs.acquire_value) {
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||||
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
|
if (word & regs.semaphore_sequence == 0) {
|
||||||
// semaphore_sequence, gives a non-0 result
|
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
|
continue;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
||||||
}
|
}
|
||||||
}
|
} while (false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,6 +123,7 @@ void Puller::ProcessSemaphoreRelease() {
|
||||||
void Puller::ProcessSemaphoreAcquire() {
|
void Puller::ProcessSemaphoreAcquire() {
|
||||||
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||||
const auto value = regs.semaphore_acquire;
|
const auto value = regs.semaphore_acquire;
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(5));
|
||||||
if (word != value) {
|
if (word != value) {
|
||||||
regs.acquire_active = true;
|
regs.acquire_active = true;
|
||||||
regs.acquire_value = value;
|
regs.acquire_value = value;
|
||||||
|
@ -146,32 +146,39 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
|
||||||
case BufferMethods::Nop:
|
case BufferMethods::Nop:
|
||||||
case BufferMethods::SemaphoreAddressHigh:
|
case BufferMethods::SemaphoreAddressHigh:
|
||||||
case BufferMethods::SemaphoreAddressLow:
|
case BufferMethods::SemaphoreAddressLow:
|
||||||
case BufferMethods::SemaphoreSequence:
|
case BufferMethods::SemaphoreSequencePayload:
|
||||||
case BufferMethods::UnkCacheFlush:
|
|
||||||
case BufferMethods::WrcacheFlush:
|
case BufferMethods::WrcacheFlush:
|
||||||
case BufferMethods::FenceValue:
|
case BufferMethods::SyncpointPayload:
|
||||||
break;
|
break;
|
||||||
case BufferMethods::RefCnt:
|
case BufferMethods::RefCnt:
|
||||||
rasterizer->SignalReference();
|
rasterizer->SignalReference();
|
||||||
break;
|
break;
|
||||||
case BufferMethods::FenceAction:
|
case BufferMethods::SyncpointOperation:
|
||||||
ProcessFenceActionMethod();
|
ProcessFenceActionMethod();
|
||||||
break;
|
break;
|
||||||
case BufferMethods::WaitForInterrupt:
|
case BufferMethods::WaitForIdle:
|
||||||
ProcessWaitForInterruptMethod();
|
rasterizer->WaitForIdle();
|
||||||
break;
|
break;
|
||||||
case BufferMethods::SemaphoreTrigger: {
|
case BufferMethods::SemaphoreOperation: {
|
||||||
ProcessSemaphoreTriggerMethod();
|
ProcessSemaphoreTriggerMethod();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BufferMethods::NotifyIntr: {
|
case BufferMethods::NonStallInterrupt: {
|
||||||
// TODO(Kmather73): Research and implement this method.
|
LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
|
||||||
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BufferMethods::Unk28: {
|
case BufferMethods::MemOpA: {
|
||||||
// TODO(Kmather73): Research and implement this method.
|
LOG_ERROR(HW_GPU, "Memory Operation A");
|
||||||
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
|
break;
|
||||||
|
}
|
||||||
|
case BufferMethods::MemOpB: {
|
||||||
|
// Implement this better.
|
||||||
|
rasterizer->SyncGuestHost();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BufferMethods::MemOpC:
|
||||||
|
case BufferMethods::MemOpD: {
|
||||||
|
LOG_ERROR(HW_GPU, "Memory Operation C,D");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BufferMethods::SemaphoreAcquire: {
|
case BufferMethods::SemaphoreAcquire: {
|
||||||
|
|
|
@ -141,7 +141,6 @@ private:
|
||||||
void ProcessSemaphoreAcquire();
|
void ProcessSemaphoreAcquire();
|
||||||
void ProcessSemaphoreRelease();
|
void ProcessSemaphoreRelease();
|
||||||
void ProcessSemaphoreTriggerMethod();
|
void ProcessSemaphoreTriggerMethod();
|
||||||
void ProcessWaitForInterruptMethod();
|
|
||||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||||
|
|
||||||
/// Mapping of command subchannels to their bound engine ids
|
/// Mapping of command subchannels to their bound engine ids
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/delayed_destruction_ring.h"
|
#include "video_core/delayed_destruction_ring.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
@ -72,6 +74,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void SignalSyncPoint(u32 value) {
|
void SignalSyncPoint(u32 value) {
|
||||||
|
syncpoint_manager.IncrementGuest(value);
|
||||||
TryReleasePendingFences();
|
TryReleasePendingFences();
|
||||||
const bool should_flush = ShouldFlush();
|
const bool should_flush = ShouldFlush();
|
||||||
CommitAsyncFlushes();
|
CommitAsyncFlushes();
|
||||||
|
@ -96,7 +99,7 @@ public:
|
||||||
auto payload = current_fence->GetPayload();
|
auto payload = current_fence->GetPayload();
|
||||||
std::memcpy(address, &payload, sizeof(payload));
|
std::memcpy(address, &payload, sizeof(payload));
|
||||||
} else {
|
} else {
|
||||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
syncpoint_manager.IncrementHost(current_fence->GetPayload());
|
||||||
}
|
}
|
||||||
PopFence();
|
PopFence();
|
||||||
}
|
}
|
||||||
|
@ -106,8 +109,8 @@ protected:
|
||||||
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||||
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
|
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
|
||||||
TQueryCache& query_cache_)
|
TQueryCache& query_cache_)
|
||||||
: rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
|
: rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
|
||||||
buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
|
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
|
||||||
|
|
||||||
virtual ~FenceManager() = default;
|
virtual ~FenceManager() = default;
|
||||||
|
|
||||||
|
@ -125,6 +128,7 @@ protected:
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
Tegra::GPU& gpu;
|
Tegra::GPU& gpu;
|
||||||
|
Tegra::Host1x::SyncpointManager& syncpoint_manager;
|
||||||
TTextureCache& texture_cache;
|
TTextureCache& texture_cache;
|
||||||
TTBufferCache& buffer_cache;
|
TTBufferCache& buffer_cache;
|
||||||
TQueryCache& query_cache;
|
TQueryCache& query_cache;
|
||||||
|
@ -142,7 +146,7 @@ private:
|
||||||
const auto payload = current_fence->GetPayload();
|
const auto payload = current_fence->GetPayload();
|
||||||
std::memcpy(address, &payload, sizeof(payload));
|
std::memcpy(address, &payload, sizeof(payload));
|
||||||
} else {
|
} else {
|
||||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
syncpoint_manager.IncrementHost(current_fence->GetPayload());
|
||||||
}
|
}
|
||||||
PopFence();
|
PopFence();
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,8 @@
|
||||||
#include "video_core/engines/maxwell_dma.h"
|
#include "video_core/engines/maxwell_dma.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/gpu_thread.h"
|
#include "video_core/gpu_thread.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/shader_notify.h"
|
#include "video_core/shader_notify.h"
|
||||||
|
@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
|
||||||
|
|
||||||
struct GPU::Impl {
|
struct GPU::Impl {
|
||||||
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
||||||
: gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
|
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
|
||||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
||||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
|
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
|
||||||
|
|
||||||
|
@ -115,31 +117,35 @@ struct GPU::Impl {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Request a host GPU memory flush from the CPU.
|
/// Request a host GPU memory flush from the CPU.
|
||||||
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
|
template <typename Func>
|
||||||
std::unique_lock lck{flush_request_mutex};
|
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
|
||||||
const u64 fence = ++last_flush_fence;
|
std::unique_lock lck{sync_request_mutex};
|
||||||
flush_requests.emplace_back(fence, addr, size);
|
const u64 fence = ++last_sync_fence;
|
||||||
|
sync_requests.emplace_back(action);
|
||||||
return fence;
|
return fence;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Obtains current flush request fence id.
|
/// Obtains current flush request fence id.
|
||||||
[[nodiscard]] u64 CurrentFlushRequestFence() const {
|
[[nodiscard]] u64 CurrentSyncRequestFence() const {
|
||||||
return current_flush_fence.load(std::memory_order_relaxed);
|
return current_sync_fence.load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void WaitForSyncOperation(const u64 fence) {
|
||||||
|
std::unique_lock lck{sync_request_mutex};
|
||||||
|
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tick pending requests within the GPU.
|
/// Tick pending requests within the GPU.
|
||||||
void TickWork() {
|
void TickWork() {
|
||||||
std::unique_lock lck{flush_request_mutex};
|
std::unique_lock lck{sync_request_mutex};
|
||||||
while (!flush_requests.empty()) {
|
while (!sync_requests.empty()) {
|
||||||
auto& request = flush_requests.front();
|
auto request = std::move(sync_requests.front());
|
||||||
const u64 fence = request.fence;
|
sync_requests.pop_front();
|
||||||
const VAddr addr = request.addr;
|
sync_request_mutex.unlock();
|
||||||
const std::size_t size = request.size;
|
request();
|
||||||
flush_requests.pop_front();
|
current_sync_fence.fetch_add(1, std::memory_order_release);
|
||||||
flush_request_mutex.unlock();
|
sync_request_mutex.lock();
|
||||||
rasterizer->FlushRegion(addr, size);
|
sync_request_cv.notify_all();
|
||||||
current_flush_fence.store(fence);
|
|
||||||
flush_request_mutex.lock();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,78 +213,26 @@ struct GPU::Impl {
|
||||||
|
|
||||||
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
||||||
void WaitFence(u32 syncpoint_id, u32 value) {
|
void WaitFence(u32 syncpoint_id, u32 value) {
|
||||||
// Synced GPU, is always in sync
|
|
||||||
if (!is_async) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (syncpoint_id == UINT32_MAX) {
|
if (syncpoint_id == UINT32_MAX) {
|
||||||
// TODO: Research what this does.
|
|
||||||
LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
MICROPROFILE_SCOPE(GPU_wait);
|
MICROPROFILE_SCOPE(GPU_wait);
|
||||||
std::unique_lock lock{sync_mutex};
|
host1x.GetSyncpointManager().WaitHost(syncpoint_id, value);
|
||||||
sync_cv.wait(lock, [=, this] {
|
|
||||||
if (shutting_down.load(std::memory_order_relaxed)) {
|
|
||||||
// We're shutting down, ensure no threads continue to wait for the next syncpoint
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return syncpoints.at(syncpoint_id).load() >= value;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void IncrementSyncPoint(u32 syncpoint_id) {
|
void IncrementSyncPoint(u32 syncpoint_id) {
|
||||||
auto& syncpoint = syncpoints.at(syncpoint_id);
|
host1x.GetSyncpointManager().IncrementHost(syncpoint_id);
|
||||||
syncpoint++;
|
|
||||||
std::scoped_lock lock{sync_mutex};
|
|
||||||
sync_cv.notify_all();
|
|
||||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
|
||||||
if (!interrupt.empty()) {
|
|
||||||
u32 value = syncpoint.load();
|
|
||||||
auto it = interrupt.begin();
|
|
||||||
while (it != interrupt.end()) {
|
|
||||||
if (value >= *it) {
|
|
||||||
TriggerCpuInterrupt(syncpoint_id, *it);
|
|
||||||
it = interrupt.erase(it);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
it++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
|
[[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
|
||||||
return syncpoints.at(syncpoint_id).load();
|
return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
|
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
|
||||||
std::scoped_lock lock{sync_mutex};
|
auto& syncpoint_manager = host1x.GetSyncpointManager();
|
||||||
u32 current_value = syncpoints.at(syncpoint_id).load();
|
syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() {
|
||||||
if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) {
|
|
||||||
TriggerCpuInterrupt(syncpoint_id, value);
|
TriggerCpuInterrupt(syncpoint_id, value);
|
||||||
return;
|
});
|
||||||
}
|
|
||||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
|
||||||
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
|
|
||||||
[value](u32 in_value) { return in_value == value; });
|
|
||||||
if (contains) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
interrupt.emplace_back(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
|
|
||||||
std::scoped_lock lock{sync_mutex};
|
|
||||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
|
||||||
const auto iter =
|
|
||||||
std::find_if(interrupt.begin(), interrupt.end(),
|
|
||||||
[value](u32 interrupt_value) { return value == interrupt_value; });
|
|
||||||
|
|
||||||
if (iter == interrupt.end()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
interrupt.erase(iter);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] u64 GetTicks() const {
|
[[nodiscard]] u64 GetTicks() const {
|
||||||
|
@ -387,8 +341,48 @@ struct GPU::Impl {
|
||||||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
|
||||||
|
Service::Nvidia::NvFence* fences, size_t num_fences) {
|
||||||
|
size_t current_request_counter{};
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lk(request_swap_mutex);
|
||||||
|
if (free_swap_counters.empty()) {
|
||||||
|
current_request_counter = request_swap_counters.size();
|
||||||
|
request_swap_counters.emplace_back(num_fences);
|
||||||
|
} else {
|
||||||
|
current_request_counter = free_swap_counters.front();
|
||||||
|
request_swap_counters[current_request_counter] = num_fences;
|
||||||
|
free_swap_counters.pop_front();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const auto wait_fence =
|
||||||
|
RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] {
|
||||||
|
auto& syncpoint_manager = host1x.GetSyncpointManager();
|
||||||
|
if (num_fences == 0) {
|
||||||
|
renderer->SwapBuffers(framebuffer);
|
||||||
|
}
|
||||||
|
const auto executer = [this, current_request_counter,
|
||||||
|
framebuffer_copy = *framebuffer]() {
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lk(request_swap_mutex);
|
||||||
|
if (--request_swap_counters[current_request_counter] != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
free_swap_counters.push_back(current_request_counter);
|
||||||
|
}
|
||||||
|
renderer->SwapBuffers(&framebuffer_copy);
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < num_fences; i++) {
|
||||||
|
syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
gpu_thread.TickGPU();
|
||||||
|
WaitForSyncOperation(wait_fence);
|
||||||
|
}
|
||||||
|
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
Host1x::Host1x& host1x;
|
||||||
|
|
||||||
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
|
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
|
||||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||||
|
@ -411,18 +405,11 @@ struct GPU::Impl {
|
||||||
|
|
||||||
std::condition_variable sync_cv;
|
std::condition_variable sync_cv;
|
||||||
|
|
||||||
struct FlushRequest {
|
std::list<std::function<void(void)>> sync_requests;
|
||||||
explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
|
std::atomic<u64> current_sync_fence{};
|
||||||
: fence{fence_}, addr{addr_}, size{size_} {}
|
u64 last_sync_fence{};
|
||||||
u64 fence;
|
std::mutex sync_request_mutex;
|
||||||
VAddr addr;
|
std::condition_variable sync_request_cv;
|
||||||
std::size_t size;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::list<FlushRequest> flush_requests;
|
|
||||||
std::atomic<u64> current_flush_fence{};
|
|
||||||
u64 last_flush_fence{};
|
|
||||||
std::mutex flush_request_mutex;
|
|
||||||
|
|
||||||
const bool is_async;
|
const bool is_async;
|
||||||
|
|
||||||
|
@ -433,6 +420,10 @@ struct GPU::Impl {
|
||||||
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
||||||
Tegra::Control::ChannelState* current_channel;
|
Tegra::Control::ChannelState* current_channel;
|
||||||
s32 bound_channel{-1};
|
s32 bound_channel{-1};
|
||||||
|
|
||||||
|
std::deque<size_t> free_swap_counters;
|
||||||
|
std::deque<size_t> request_swap_counters;
|
||||||
|
std::mutex request_swap_mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
|
GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
|
||||||
|
@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() {
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
|
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
|
||||||
return impl->RequestFlush(addr, size);
|
return impl->RequestSyncOperation(
|
||||||
|
[this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GPU::CurrentFlushRequestFence() const {
|
u64 GPU::CurrentSyncRequestFence() const {
|
||||||
return impl->CurrentFlushRequestFence();
|
return impl->CurrentSyncRequestFence();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::WaitForSyncOperation(u64 fence) {
|
||||||
|
return impl->WaitForSyncOperation(fence);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::TickWork() {
|
void GPU::TickWork() {
|
||||||
impl->TickWork();
|
impl->TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Gets a mutable reference to the Host1x interface
|
||||||
|
Host1x::Host1x& GPU::Host1x() {
|
||||||
|
return impl->host1x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets an immutable reference to the Host1x interface.
|
||||||
|
const Host1x::Host1x& GPU::Host1x() const {
|
||||||
|
return impl->host1x;
|
||||||
|
}
|
||||||
|
|
||||||
Engines::Maxwell3D& GPU::Maxwell3D() {
|
Engines::Maxwell3D& GPU::Maxwell3D() {
|
||||||
return impl->Maxwell3D();
|
return impl->Maxwell3D();
|
||||||
}
|
}
|
||||||
|
@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
|
||||||
return impl->ShaderNotify();
|
return impl->ShaderNotify();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
|
||||||
|
Service::Nvidia::NvFence* fences, size_t num_fences) {
|
||||||
|
impl->RequestSwapBuffers(framebuffer, fences, num_fences);
|
||||||
|
}
|
||||||
|
|
||||||
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
|
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
|
||||||
impl->WaitFence(syncpoint_id, value);
|
impl->WaitFence(syncpoint_id, value);
|
||||||
}
|
}
|
||||||
|
@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
|
||||||
impl->RegisterSyncptInterrupt(syncpoint_id, value);
|
impl->RegisterSyncptInterrupt(syncpoint_id, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
|
|
||||||
return impl->CancelSyncptInterrupt(syncpoint_id, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 GPU::GetTicks() const {
|
u64 GPU::GetTicks() const {
|
||||||
return impl->GetTicks();
|
return impl->GetTicks();
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,10 @@ namespace Control {
|
||||||
struct ChannelState;
|
struct ChannelState;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
class Host1x;
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
|
|
||||||
class GPU final {
|
class GPU final {
|
||||||
|
@ -124,11 +128,19 @@ public:
|
||||||
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
|
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
|
||||||
|
|
||||||
/// Obtains current flush request fence id.
|
/// Obtains current flush request fence id.
|
||||||
[[nodiscard]] u64 CurrentFlushRequestFence() const;
|
[[nodiscard]] u64 CurrentSyncRequestFence() const;
|
||||||
|
|
||||||
|
void WaitForSyncOperation(u64 fence);
|
||||||
|
|
||||||
/// Tick pending requests within the GPU.
|
/// Tick pending requests within the GPU.
|
||||||
void TickWork();
|
void TickWork();
|
||||||
|
|
||||||
|
/// Gets a mutable reference to the Host1x interface
|
||||||
|
[[nodiscard]] Host1x::Host1x& Host1x();
|
||||||
|
|
||||||
|
/// Gets an immutable reference to the Host1x interface.
|
||||||
|
[[nodiscard]] const Host1x::Host1x& Host1x() const;
|
||||||
|
|
||||||
/// Returns a reference to the Maxwell3D GPU engine.
|
/// Returns a reference to the Maxwell3D GPU engine.
|
||||||
[[nodiscard]] Engines::Maxwell3D& Maxwell3D();
|
[[nodiscard]] Engines::Maxwell3D& Maxwell3D();
|
||||||
|
|
||||||
|
@ -174,8 +186,6 @@ public:
|
||||||
|
|
||||||
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
|
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
|
||||||
|
|
||||||
bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
|
|
||||||
|
|
||||||
[[nodiscard]] u64 GetTicks() const;
|
[[nodiscard]] u64 GetTicks() const;
|
||||||
|
|
||||||
[[nodiscard]] bool IsAsync() const;
|
[[nodiscard]] bool IsAsync() const;
|
||||||
|
@ -184,6 +194,9 @@ public:
|
||||||
|
|
||||||
void RendererFrameEndNotify();
|
void RendererFrameEndNotify();
|
||||||
|
|
||||||
|
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
|
||||||
|
Service::Nvidia::NvFence* fences, size_t num_fences);
|
||||||
|
|
||||||
/// Performs any additional setup necessary in order to begin GPU emulation.
|
/// Performs any additional setup necessary in order to begin GPU emulation.
|
||||||
/// This can be used to launch any necessary threads and register any necessary
|
/// This can be used to launch any necessary threads and register any necessary
|
||||||
/// core timing events.
|
/// core timing events.
|
||||||
|
|
|
@ -93,8 +93,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||||
}
|
}
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
u64 fence = gpu.RequestFlush(addr, size);
|
u64 fence = gpu.RequestFlush(addr, size);
|
||||||
|
TickGPU();
|
||||||
|
gpu.WaitForSyncOperation(fence);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ThreadManager::TickGPU() {
|
||||||
PushCommand(GPUTickCommand(), true);
|
PushCommand(GPUTickCommand(), true);
|
||||||
ASSERT(fence <= gpu.CurrentFlushRequestFence());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
|
|
|
@ -135,6 +135,8 @@ public:
|
||||||
|
|
||||||
void OnCommandListEnd();
|
void OnCommandListEnd();
|
||||||
|
|
||||||
|
void TickGPU();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Pushes a command to be executed by the GPU thread
|
/// Pushes a command to be executed by the GPU thread
|
||||||
u64 PushCommand(CommandData&& command_data, bool block = false);
|
u64 PushCommand(CommandData&& command_data, bool block = false);
|
||||||
|
|
|
@ -6,11 +6,11 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
#include "video_core/command_classes/codecs/codec.h"
|
|
||||||
#include "video_core/command_classes/codecs/h264.h"
|
|
||||||
#include "video_core/command_classes/codecs/vp8.h"
|
|
||||||
#include "video_core/command_classes/codecs/vp9.h"
|
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/codecs/codec.h"
|
||||||
|
#include "video_core/host1x/codecs/h264.h"
|
||||||
|
#include "video_core/host1x/codecs/vp8.h"
|
||||||
|
#include "video_core/host1x/codecs/vp9.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -73,7 +73,7 @@ void AVFrameDeleter(AVFrame* ptr) {
|
||||||
av_frame_free(&ptr);
|
av_frame_free(&ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
|
Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs)
|
||||||
: gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
|
: gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
|
||||||
vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
|
vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
|
||||||
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
|
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
|
||||||
|
@ -168,11 +168,11 @@ void Codec::InitializeGpuDecoder() {
|
||||||
void Codec::Initialize() {
|
void Codec::Initialize() {
|
||||||
const AVCodecID codec = [&] {
|
const AVCodecID codec = [&] {
|
||||||
switch (current_codec) {
|
switch (current_codec) {
|
||||||
case NvdecCommon::VideoCodec::H264:
|
case Host1x::NvdecCommon::VideoCodec::H264:
|
||||||
return AV_CODEC_ID_H264;
|
return AV_CODEC_ID_H264;
|
||||||
case NvdecCommon::VideoCodec::VP8:
|
case Host1x::NvdecCommon::VideoCodec::VP8:
|
||||||
return AV_CODEC_ID_VP8;
|
return AV_CODEC_ID_VP8;
|
||||||
case NvdecCommon::VideoCodec::VP9:
|
case Host1x::NvdecCommon::VideoCodec::VP9:
|
||||||
return AV_CODEC_ID_VP9;
|
return AV_CODEC_ID_VP9;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
|
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
|
||||||
|
@ -197,7 +197,7 @@ void Codec::Initialize() {
|
||||||
initialized = true;
|
initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
|
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
|
||||||
if (current_codec != codec) {
|
if (current_codec != codec) {
|
||||||
current_codec = codec;
|
current_codec = codec;
|
||||||
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
|
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
|
||||||
|
@ -215,11 +215,11 @@ void Codec::Decode() {
|
||||||
bool vp9_hidden_frame = false;
|
bool vp9_hidden_frame = false;
|
||||||
const auto& frame_data = [&]() {
|
const auto& frame_data = [&]() {
|
||||||
switch (current_codec) {
|
switch (current_codec) {
|
||||||
case Tegra::NvdecCommon::VideoCodec::H264:
|
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
|
||||||
return h264_decoder->ComposeFrame(state, is_first_frame);
|
return h264_decoder->ComposeFrame(state, is_first_frame);
|
||||||
case Tegra::NvdecCommon::VideoCodec::VP8:
|
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
|
||||||
return vp8_decoder->ComposeFrame(state);
|
return vp8_decoder->ComposeFrame(state);
|
||||||
case Tegra::NvdecCommon::VideoCodec::VP9:
|
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
|
||||||
vp9_decoder->ComposeFrame(state);
|
vp9_decoder->ComposeFrame(state);
|
||||||
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
|
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
|
||||||
return vp9_decoder->GetFrameBytes();
|
return vp9_decoder->GetFrameBytes();
|
||||||
|
@ -287,21 +287,21 @@ AVFramePtr Codec::GetCurrentFrame() {
|
||||||
return frame;
|
return frame;
|
||||||
}
|
}
|
||||||
|
|
||||||
NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
|
Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
|
||||||
return current_codec;
|
return current_codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view Codec::GetCurrentCodecName() const {
|
std::string_view Codec::GetCurrentCodecName() const {
|
||||||
switch (current_codec) {
|
switch (current_codec) {
|
||||||
case NvdecCommon::VideoCodec::None:
|
case Host1x::NvdecCommon::VideoCodec::None:
|
||||||
return "None";
|
return "None";
|
||||||
case NvdecCommon::VideoCodec::H264:
|
case Host1x::NvdecCommon::VideoCodec::H264:
|
||||||
return "H264";
|
return "H264";
|
||||||
case NvdecCommon::VideoCodec::VP8:
|
case Host1x::NvdecCommon::VideoCodec::VP8:
|
||||||
return "VP8";
|
return "VP8";
|
||||||
case NvdecCommon::VideoCodec::H265:
|
case Host1x::NvdecCommon::VideoCodec::H265:
|
||||||
return "H265";
|
return "H265";
|
||||||
case NvdecCommon::VideoCodec::VP9:
|
case Host1x::NvdecCommon::VideoCodec::VP9:
|
||||||
return "VP9";
|
return "VP9";
|
||||||
default:
|
default:
|
||||||
return "Unknown";
|
return "Unknown";
|
|
@ -6,8 +6,8 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
#include "common/common_types.h"
|
||||||
#include "video_core/command_classes/nvdec_common.h"
|
#include "video_core/host1x/nvdec_common.h"
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
@ -34,14 +34,14 @@ class VP9;
|
||||||
|
|
||||||
class Codec {
|
class Codec {
|
||||||
public:
|
public:
|
||||||
explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
|
explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs);
|
||||||
~Codec();
|
~Codec();
|
||||||
|
|
||||||
/// Initialize the codec, returning success or failure
|
/// Initialize the codec, returning success or failure
|
||||||
void Initialize();
|
void Initialize();
|
||||||
|
|
||||||
/// Sets NVDEC video stream codec
|
/// Sets NVDEC video stream codec
|
||||||
void SetTargetCodec(NvdecCommon::VideoCodec codec);
|
void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
|
||||||
|
|
||||||
/// Call decoders to construct headers, decode AVFrame with ffmpeg
|
/// Call decoders to construct headers, decode AVFrame with ffmpeg
|
||||||
void Decode();
|
void Decode();
|
||||||
|
@ -50,7 +50,7 @@ public:
|
||||||
[[nodiscard]] AVFramePtr GetCurrentFrame();
|
[[nodiscard]] AVFramePtr GetCurrentFrame();
|
||||||
|
|
||||||
/// Returns the value of current_codec
|
/// Returns the value of current_codec
|
||||||
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
|
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
|
||||||
|
|
||||||
/// Return name of the current codec
|
/// Return name of the current codec
|
||||||
[[nodiscard]] std::string_view GetCurrentCodecName() const;
|
[[nodiscard]] std::string_view GetCurrentCodecName() const;
|
||||||
|
@ -63,14 +63,14 @@ private:
|
||||||
bool CreateGpuAvDevice();
|
bool CreateGpuAvDevice();
|
||||||
|
|
||||||
bool initialized{};
|
bool initialized{};
|
||||||
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
|
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
|
||||||
|
|
||||||
const AVCodec* av_codec{nullptr};
|
const AVCodec* av_codec{nullptr};
|
||||||
AVCodecContext* av_codec_ctx{nullptr};
|
AVCodecContext* av_codec_ctx{nullptr};
|
||||||
AVBufferRef* av_gpu_decoder{nullptr};
|
AVBufferRef* av_gpu_decoder{nullptr};
|
||||||
|
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
const NvdecCommon::NvdecRegisters& state;
|
const Host1x::NvdecCommon::NvdecRegisters& state;
|
||||||
std::unique_ptr<Decoder::H264> h264_decoder;
|
std::unique_ptr<Decoder::H264> h264_decoder;
|
||||||
std::unique_ptr<Decoder::VP8> vp8_decoder;
|
std::unique_ptr<Decoder::VP8> vp8_decoder;
|
||||||
std::unique_ptr<Decoder::VP9> vp9_decoder;
|
std::unique_ptr<Decoder::VP9> vp9_decoder;
|
|
@ -5,8 +5,8 @@
|
||||||
#include <bit>
|
#include <bit>
|
||||||
|
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
#include "video_core/command_classes/codecs/h264.h"
|
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/codecs/h264.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Tegra::Decoder {
|
namespace Tegra::Decoder {
|
||||||
|
@ -28,7 +28,7 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
|
||||||
|
|
||||||
H264::~H264() = default;
|
H264::~H264() = default;
|
||||||
|
|
||||||
const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state,
|
const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
|
||||||
bool is_first_frame) {
|
bool is_first_frame) {
|
||||||
H264DecoderContext context;
|
H264DecoderContext context;
|
||||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
|
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
|
|
@ -8,7 +8,7 @@
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/command_classes/nvdec_common.h"
|
#include "video_core/host1x/nvdec_common.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class GPU;
|
class GPU;
|
||||||
|
@ -59,8 +59,8 @@ public:
|
||||||
~H264();
|
~H264();
|
||||||
|
|
||||||
/// Compose the H264 frame for FFmpeg decoding
|
/// Compose the H264 frame for FFmpeg decoding
|
||||||
[[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state,
|
[[nodiscard]] const std::vector<u8>& ComposeFrame(
|
||||||
bool is_first_frame = false);
|
const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<u8> frame;
|
std::vector<u8> frame;
|
|
@ -3,8 +3,8 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "video_core/command_classes/codecs/vp8.h"
|
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/codecs/vp8.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Tegra::Decoder {
|
namespace Tegra::Decoder {
|
||||||
|
@ -12,7 +12,7 @@ VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
|
||||||
|
|
||||||
VP8::~VP8() = default;
|
VP8::~VP8() = default;
|
||||||
|
|
||||||
const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
|
const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||||
VP8PictureInfo info;
|
VP8PictureInfo info;
|
||||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
|
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/command_classes/nvdec_common.h"
|
#include "video_core/host1x/nvdec_common.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class GPU;
|
class GPU;
|
||||||
|
@ -20,7 +20,8 @@ public:
|
||||||
~VP8();
|
~VP8();
|
||||||
|
|
||||||
/// Compose the VP8 frame for FFmpeg decoding
|
/// Compose the VP8 frame for FFmpeg decoding
|
||||||
[[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state);
|
[[nodiscard]] const std::vector<u8>& ComposeFrame(
|
||||||
|
const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<u8> frame;
|
std::vector<u8> frame;
|
|
@ -4,8 +4,8 @@
|
||||||
#include <algorithm> // for std::copy
|
#include <algorithm> // for std::copy
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "video_core/command_classes/codecs/vp9.h"
|
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/codecs/vp9.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Tegra::Decoder {
|
namespace Tegra::Decoder {
|
||||||
|
@ -355,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
|
Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||||
PictureInfo picture_info;
|
PictureInfo picture_info;
|
||||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
|
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
|
||||||
Vp9PictureInfo vp9_info = picture_info.Convert();
|
Vp9PictureInfo vp9_info = picture_info.Convert();
|
||||||
|
@ -376,7 +376,7 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
|
||||||
entropy.Convert(dst);
|
entropy.Convert(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
|
Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||||
Vp9FrameContainer current_frame{};
|
Vp9FrameContainer current_frame{};
|
||||||
{
|
{
|
||||||
gpu.SyncGuestHost();
|
gpu.SyncGuestHost();
|
||||||
|
@ -769,7 +769,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
|
||||||
return uncomp_writer;
|
return uncomp_writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
|
void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
|
||||||
std::vector<u8> bitstream;
|
std::vector<u8> bitstream;
|
||||||
{
|
{
|
||||||
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
|
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
|
|
@ -8,8 +8,8 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/stream.h"
|
#include "common/stream.h"
|
||||||
#include "video_core/command_classes/codecs/vp9_types.h"
|
#include "video_core/host1x/codecs/vp9_types.h"
|
||||||
#include "video_core/command_classes/nvdec_common.h"
|
#include "video_core/host1x/nvdec_common.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class GPU;
|
class GPU;
|
||||||
|
@ -117,7 +117,7 @@ public:
|
||||||
|
|
||||||
/// Composes the VP9 frame from the GPU state information.
|
/// Composes the VP9 frame from the GPU state information.
|
||||||
/// Based on the official VP9 spec documentation
|
/// Based on the official VP9 spec documentation
|
||||||
void ComposeFrame(const NvdecCommon::NvdecRegisters& state);
|
void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||||
|
|
||||||
/// Returns true if the most recent frame was a hidden frame.
|
/// Returns true if the most recent frame was a hidden frame.
|
||||||
[[nodiscard]] bool WasFrameHidden() const {
|
[[nodiscard]] bool WasFrameHidden() const {
|
||||||
|
@ -162,13 +162,15 @@ private:
|
||||||
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||||
|
|
||||||
/// Returns VP9 information from NVDEC provided offset and size
|
/// Returns VP9 information from NVDEC provided offset and size
|
||||||
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
|
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
|
||||||
|
const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||||
|
|
||||||
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
|
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
|
||||||
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
|
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
|
||||||
|
|
||||||
/// Returns frame to be decoded after buffering
|
/// Returns frame to be decoded after buffering
|
||||||
[[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
|
[[nodiscard]] Vp9FrameContainer GetCurrentFrame(
|
||||||
|
const Host1x::NvdecCommon::NvdecRegisters& state);
|
||||||
|
|
||||||
/// Use NVDEC providied information to compose the headers for the current frame
|
/// Use NVDEC providied information to compose the headers for the current frame
|
||||||
[[nodiscard]] std::vector<u8> ComposeCompressedHeader();
|
[[nodiscard]] std::vector<u8> ComposeCompressedHeader();
|
35
src/video_core/host1x/control.cpp
Normal file
35
src/video_core/host1x/control.cpp
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
// Copyright 2022 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv3 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/control.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
|
|
||||||
|
namespace Tegra::Host1x {
|
||||||
|
|
||||||
|
Control::Control(GPU& gpu_) : gpu(gpu_) {}
|
||||||
|
|
||||||
|
Control::~Control() = default;
|
||||||
|
|
||||||
|
void Control::ProcessMethod(Method method, u32 argument) {
|
||||||
|
switch (method) {
|
||||||
|
case Method::LoadSyncptPayload32:
|
||||||
|
syncpoint_value = argument;
|
||||||
|
break;
|
||||||
|
case Method::WaitSyncpt:
|
||||||
|
case Method::WaitSyncpt32:
|
||||||
|
Execute(argument);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Control::Execute(u32 data) {
|
||||||
|
gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Tegra::Host1x
|
|
@ -1,5 +1,7 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// (https://github.com/skyline-emu/)
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
|
||||||
|
// or any later version Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
@ -7,9 +9,12 @@
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
class Nvdec;
|
class Nvdec;
|
||||||
|
|
||||||
class Host1x {
|
class Control {
|
||||||
public:
|
public:
|
||||||
enum class Method : u32 {
|
enum class Method : u32 {
|
||||||
WaitSyncpt = 0x8,
|
WaitSyncpt = 0x8,
|
||||||
|
@ -17,8 +22,8 @@ public:
|
||||||
WaitSyncpt32 = 0x50,
|
WaitSyncpt32 = 0x50,
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit Host1x(GPU& gpu);
|
explicit Control(GPU& gpu);
|
||||||
~Host1x();
|
~Control();
|
||||||
|
|
||||||
/// Writes the method into the state, Invoke Execute() if encountered
|
/// Writes the method into the state, Invoke Execute() if encountered
|
||||||
void ProcessMethod(Method method, u32 argument);
|
void ProcessMethod(Method method, u32 argument);
|
||||||
|
@ -31,4 +36,6 @@ private:
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
33
src/video_core/host1x/host1x.h
Normal file
33
src/video_core/host1x/host1x.h
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
// Copyright 2022 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv3 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
|
class Host1x {
|
||||||
|
public:
|
||||||
|
Host1x() : syncpoint_manager{} {}
|
||||||
|
|
||||||
|
SyncpointManager& GetSyncpointManager() {
|
||||||
|
return syncpoint_manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SyncpointManager& GetSyncpointManager() const {
|
||||||
|
return syncpoint_manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
SyncpointManager syncpoint_manager;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
|
} // namespace Tegra
|
|
@ -2,10 +2,10 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "video_core/command_classes/nvdec.h"
|
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/nvdec.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra::Host1x {
|
||||||
|
|
||||||
#define NVDEC_REG_INDEX(field_name) \
|
#define NVDEC_REG_INDEX(field_name) \
|
||||||
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
|
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
|
||||||
|
@ -44,4 +44,4 @@ void Nvdec::Execute() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra::Host1x
|
|
@ -6,11 +6,13 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/command_classes/codecs/codec.h"
|
#include "video_core/host1x/codecs/codec.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
class Nvdec {
|
class Nvdec {
|
||||||
public:
|
public:
|
||||||
explicit Nvdec(GPU& gpu);
|
explicit Nvdec(GPU& gpu);
|
||||||
|
@ -30,4 +32,7 @@ private:
|
||||||
NvdecCommon::NvdecRegisters state;
|
NvdecCommon::NvdecRegisters state;
|
||||||
std::unique_ptr<Codec> codec;
|
std::unique_ptr<Codec> codec;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
|
@ -7,7 +7,7 @@
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace Tegra::NvdecCommon {
|
namespace Tegra::Host1x::NvdecCommon {
|
||||||
|
|
||||||
enum class VideoCodec : u64 {
|
enum class VideoCodec : u64 {
|
||||||
None = 0x0,
|
None = 0x0,
|
||||||
|
@ -94,4 +94,4 @@ ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
|
||||||
|
|
||||||
#undef ASSERT_REG_POSITION
|
#undef ASSERT_REG_POSITION
|
||||||
|
|
||||||
} // namespace Tegra::NvdecCommon
|
} // namespace Tegra::Host1x::NvdecCommon
|
|
@ -4,8 +4,12 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "sync_manager.h"
|
#include "sync_manager.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/host1x.h"
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
|
SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
|
||||||
SyncptIncrManager::~SyncptIncrManager() = default;
|
SyncptIncrManager::~SyncptIncrManager() = default;
|
||||||
|
|
||||||
|
@ -36,8 +40,12 @@ void SyncptIncrManager::IncrementAllDone() {
|
||||||
if (!increments[done_count].complete) {
|
if (!increments[done_count].complete) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
|
auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
|
||||||
|
syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
|
||||||
|
syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
|
||||||
}
|
}
|
||||||
increments.erase(increments.begin(), increments.begin() + done_count);
|
increments.erase(increments.begin(), increments.begin() + done_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
|
@ -8,7 +8,11 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
struct SyncptIncr {
|
struct SyncptIncr {
|
||||||
u32 id;
|
u32 id;
|
||||||
u32 class_id;
|
u32 class_id;
|
||||||
|
@ -44,4 +48,6 @@ private:
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
93
src/video_core/host1x/syncpoint_manager.cpp
Normal file
93
src/video_core/host1x/syncpoint_manager.cpp
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv3 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
|
SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
|
||||||
|
std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value,
|
||||||
|
std::function<void(void)>& action) {
|
||||||
|
if (syncpoint.load(std::memory_order_acquire) >= expected_value) {
|
||||||
|
action();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lk(guard);
|
||||||
|
if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
|
||||||
|
action();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
auto it = action_storage.begin();
|
||||||
|
while (it != action_storage.end()) {
|
||||||
|
if (it->expected_value >= expected_value) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
return action_storage.emplace(it, expected_value, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
|
||||||
|
ActionHandle& handle) {
|
||||||
|
std::unique_lock<std::mutex> lk(guard);
|
||||||
|
action_storage.erase(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
|
||||||
|
DeregisterAction(guest_action_storage[syncpoint_id], handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) {
|
||||||
|
DeregisterAction(host_action_storage[syncpoint_id], handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::IncrementGuest(u32 syncpoint_id) {
|
||||||
|
Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::IncrementHost(u32 syncpoint_id) {
|
||||||
|
Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) {
|
||||||
|
Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) {
|
||||||
|
Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
|
||||||
|
std::list<RegisteredAction>& action_storage) {
|
||||||
|
auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lk(guard);
|
||||||
|
auto it = action_storage.begin();
|
||||||
|
while (it != action_storage.end()) {
|
||||||
|
if (it->expected_value > new_value) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
it->action();
|
||||||
|
it = action_storage.erase(it);
|
||||||
|
}
|
||||||
|
wait_cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
|
||||||
|
u32 expected_value) {
|
||||||
|
const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; };
|
||||||
|
if (pred()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lk(guard);
|
||||||
|
wait_cv.wait(lk, pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
|
} // namespace Tegra
|
99
src/video_core/host1x/syncpoint_manager.h
Normal file
99
src/video_core/host1x/syncpoint_manager.h
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv3 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <functional>
|
||||||
|
#include <list>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
|
class SyncpointManager {
|
||||||
|
public:
|
||||||
|
u32 GetGuestSyncpointValue(u32 id) {
|
||||||
|
return syncpoints_guest[id].load(std::memory_order_acquire);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 GetHostSyncpointValue(u32 id) {
|
||||||
|
return syncpoints_host[id].load(std::memory_order_acquire);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegisteredAction {
|
||||||
|
RegisteredAction(u32 expected_value_, std::function<void(void)>& action_)
|
||||||
|
: expected_value{expected_value_}, action{action_} {}
|
||||||
|
u32 expected_value;
|
||||||
|
std::function<void(void)> action;
|
||||||
|
};
|
||||||
|
using ActionHandle = std::list<RegisteredAction>::iterator;
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
|
||||||
|
std::function<void(void)> func(action);
|
||||||
|
return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
|
||||||
|
expected_value, func);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
|
||||||
|
std::function<void(void)> func(action);
|
||||||
|
return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
|
||||||
|
expected_value, func);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle);
|
||||||
|
|
||||||
|
void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle);
|
||||||
|
|
||||||
|
void IncrementGuest(u32 syncpoint_id);
|
||||||
|
|
||||||
|
void IncrementHost(u32 syncpoint_id);
|
||||||
|
|
||||||
|
void WaitGuest(u32 syncpoint_id, u32 expected_value);
|
||||||
|
|
||||||
|
void WaitHost(u32 syncpoint_id, u32 expected_value);
|
||||||
|
|
||||||
|
bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) {
|
||||||
|
return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsReadyHost(u32 syncpoint_id, u32 expected_value) {
|
||||||
|
return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
|
||||||
|
std::list<RegisteredAction>& action_storage);
|
||||||
|
|
||||||
|
ActionHandle RegisterAction(std::atomic<u32>& syncpoint,
|
||||||
|
std::list<RegisteredAction>& action_storage, u32 expected_value,
|
||||||
|
std::function<void(void)>& action);
|
||||||
|
|
||||||
|
void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
|
||||||
|
|
||||||
|
void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value);
|
||||||
|
|
||||||
|
static constexpr size_t NUM_MAX_SYNCPOINTS = 192;
|
||||||
|
|
||||||
|
std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{};
|
||||||
|
std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{};
|
||||||
|
|
||||||
|
std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage;
|
||||||
|
std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage;
|
||||||
|
|
||||||
|
std::mutex guard;
|
||||||
|
std::condition_variable wait_guest_cv;
|
||||||
|
std::condition_variable wait_host_cv;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
|
} // namespace Tegra
|
|
@ -18,14 +18,17 @@ extern "C" {
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
|
||||||
#include "video_core/command_classes/nvdec.h"
|
|
||||||
#include "video_core/command_classes/vic.h"
|
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/host1x/nvdec.h"
|
||||||
|
#include "video_core/host1x/vic.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
enum class VideoPixelFormat : u64_le {
|
enum class VideoPixelFormat : u64_le {
|
||||||
RGBA8 = 0x1f,
|
RGBA8 = 0x1f,
|
||||||
|
@ -235,4 +238,6 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
|
||||||
chroma_buffer.size());
|
chroma_buffer.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
|
@ -11,6 +11,9 @@ struct SwsContext;
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
|
namespace Host1x {
|
||||||
|
|
||||||
class Nvdec;
|
class Nvdec;
|
||||||
union VicConfig;
|
union VicConfig;
|
||||||
|
|
||||||
|
@ -40,7 +43,7 @@ private:
|
||||||
void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
|
void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
|
||||||
|
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
|
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
|
||||||
|
|
||||||
/// Avoid reallocation of the following buffers every frame, as their
|
/// Avoid reallocation of the following buffers every frame, as their
|
||||||
/// size does not change during a stream
|
/// size does not change during a stream
|
||||||
|
@ -58,4 +61,6 @@ private:
|
||||||
s32 scaler_height{};
|
s32 scaler_height{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace Host1x
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
Loading…
Reference in a new issue