forked from suyu/suyu
OpenGL: Implement Fencing backend.
This commit is contained in:
parent
ed7e965712
commit
487379c593
12 changed files with 94 additions and 19 deletions
|
@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ReleaseFences() {
|
|
||||||
for (const auto pair : delay_fences) {
|
|
||||||
const auto [addr, payload] = pair;
|
|
||||||
memory_manager.Write<u32>(addr, static_cast<u32>(payload));
|
|
||||||
}
|
|
||||||
delay_fences.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Maxwell3D::ProcessQueryGet() {
|
void Maxwell3D::ProcessQueryGet() {
|
||||||
// TODO(Subv): Support the other query units.
|
// TODO(Subv): Support the other query units.
|
||||||
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
|
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
|
||||||
|
@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
|
|
||||||
switch (regs.query.query_get.operation) {
|
switch (regs.query.query_get.operation) {
|
||||||
case Regs::QueryOperation::Release: {
|
case Regs::QueryOperation::Release: {
|
||||||
rasterizer.FlushCommands();
|
|
||||||
rasterizer.SyncGuestHost();
|
|
||||||
const u64 result = regs.query.query_sequence;
|
const u64 result = regs.query.query_sequence;
|
||||||
delay_fences.emplace_back(regs.query.QueryAddress(), result);
|
if (regs.query.query_get.fence == 1) {
|
||||||
|
rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result));
|
||||||
|
} else {
|
||||||
|
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Regs::QueryOperation::Acquire:
|
case Regs::QueryOperation::Acquire:
|
||||||
|
|
|
@ -1427,8 +1427,6 @@ public:
|
||||||
Tables tables{};
|
Tables tables{};
|
||||||
} dirty;
|
} dirty;
|
||||||
|
|
||||||
void ReleaseFences();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitializeRegisterDefaults();
|
void InitializeRegisterDefaults();
|
||||||
|
|
||||||
|
@ -1469,8 +1467,6 @@ private:
|
||||||
|
|
||||||
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
|
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
|
||||||
|
|
||||||
std::vector<std::pair<GPUVAddr, u64>> delay_fences;
|
|
||||||
|
|
||||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||||
|
|
||||||
|
|
|
@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::OnCommandListEnd() {
|
void GPU::OnCommandListEnd() {
|
||||||
maxwell_3d->ReleaseFences();
|
renderer.Rasterizer().ReleaseFences();
|
||||||
}
|
}
|
||||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
||||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
||||||
|
|
|
@ -157,7 +157,7 @@ public:
|
||||||
|
|
||||||
void FlushCommands();
|
void FlushCommands();
|
||||||
void SyncGuestHost();
|
void SyncGuestHost();
|
||||||
void OnCommandListEnd();
|
virtual void OnCommandListEnd();
|
||||||
|
|
||||||
/// Returns a reference to the Maxwell3D GPU engine.
|
/// Returns a reference to the Maxwell3D GPU engine.
|
||||||
Engines::Maxwell3D& Maxwell3D();
|
Engines::Maxwell3D& Maxwell3D();
|
||||||
|
|
|
@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
|
||||||
gpu_thread.WaitIdle();
|
gpu_thread.WaitIdle();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUAsynch::OnCommandListEnd() {
|
||||||
|
gpu_thread.OnCommandListEnd();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -32,6 +32,8 @@ public:
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||||
void WaitIdle() const override;
|
void WaitIdle() const override;
|
||||||
|
|
||||||
|
void OnCommandListEnd() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
|
||||||
dma_pusher.DispatchCalls();
|
dma_pusher.DispatchCalls();
|
||||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||||
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
||||||
|
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
|
||||||
|
renderer.Rasterizer().ReleaseFences();
|
||||||
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||||
|
@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ThreadManager::OnCommandListEnd() {
|
||||||
|
PushCommand(OnCommandListEndCommand());
|
||||||
|
}
|
||||||
|
|
||||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
||||||
const u64 fence{++state.last_fence};
|
const u64 fence{++state.last_fence};
|
||||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
||||||
|
|
|
@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final {
|
||||||
u64 size;
|
u64 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Command to signal to the GPU thread that processing has ended
|
||||||
|
struct OnCommandListEndCommand final {};
|
||||||
|
|
||||||
using CommandData =
|
using CommandData =
|
||||||
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
|
||||||
|
|
||||||
struct CommandDataContainer {
|
struct CommandDataContainer {
|
||||||
CommandDataContainer() = default;
|
CommandDataContainer() = default;
|
||||||
|
@ -122,6 +125,8 @@ public:
|
||||||
// Wait until the gpu thread is idle.
|
// Wait until the gpu thread is idle.
|
||||||
void WaitIdle() const;
|
void WaitIdle() const;
|
||||||
|
|
||||||
|
void OnCommandListEnd();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Pushes a command to be executed by the GPU thread
|
/// Pushes a command to be executed by the GPU thread
|
||||||
u64 PushCommand(CommandData&& command_data);
|
u64 PushCommand(CommandData&& command_data);
|
||||||
|
|
|
@ -49,6 +49,14 @@ public:
|
||||||
/// Records a GPU query and caches it
|
/// Records a GPU query and caches it
|
||||||
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
|
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
|
||||||
|
|
||||||
|
virtual void SignalFence(GPUVAddr addr, u32 value) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void ReleaseFences() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
||||||
|
|
|
@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() {
|
||||||
buffer_cache.SyncGuestHost();
|
buffer_cache.SyncGuestHost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
|
||||||
|
if (!fences.empty()) {
|
||||||
|
const std::pair<GPUVAddr, u32>& current_fence = fences.front();
|
||||||
|
const auto [address, payload] = current_fence;
|
||||||
|
texture_cache.PopAsyncFlushes();
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
|
memory_manager.Write<u32>(address, payload);
|
||||||
|
fences.pop_front();
|
||||||
|
}
|
||||||
|
fences.emplace_back(addr, value);
|
||||||
|
texture_cache.CommitAsyncFlushes();
|
||||||
|
FlushCommands();
|
||||||
|
SyncGuestHost();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::ReleaseFences() {
|
||||||
|
while (!fences.empty()) {
|
||||||
|
const std::pair<GPUVAddr, u32>& current_fence = fences.front();
|
||||||
|
const auto [address, payload] = current_fence;
|
||||||
|
texture_cache.PopAsyncFlushes();
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
|
memory_manager.Write<u32>(address, payload);
|
||||||
|
fences.pop_front();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
if (Settings::IsGPULevelExtreme()) {
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
FlushRegion(addr, size);
|
FlushRegion(addr, size);
|
||||||
|
|
|
@ -69,6 +69,8 @@ public:
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void SyncGuestHost() override;
|
void SyncGuestHost() override;
|
||||||
|
void SignalFence(GPUVAddr addr, u32 value) override;
|
||||||
|
void ReleaseFences() override;
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||||
void FlushCommands() override;
|
void FlushCommands() override;
|
||||||
void TickFrame() override;
|
void TickFrame() override;
|
||||||
|
|
|
@ -238,7 +238,7 @@ public:
|
||||||
surface->MarkAsRenderTarget(false, NO_RT);
|
surface->MarkAsRenderTarget(false, NO_RT);
|
||||||
const auto& cr_params = surface->GetSurfaceParams();
|
const auto& cr_params = surface->GetSurfaceParams();
|
||||||
if (!cr_params.is_tiled) {
|
if (!cr_params.is_tiled) {
|
||||||
FlushSurface(surface);
|
AsyncFlushSurface(surface);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
render_targets[index].target = surface_view.first;
|
render_targets[index].target = surface_view.first;
|
||||||
|
@ -317,6 +317,26 @@ public:
|
||||||
return ++ticks;
|
return ++ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommitAsyncFlushes() {
|
||||||
|
commited_flushes.push_back(uncommited_flushes);
|
||||||
|
uncommited_flushes.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PopAsyncFlushes() {
|
||||||
|
if (commited_flushes.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto& flush_list = commited_flushes.front();
|
||||||
|
if (!flush_list) {
|
||||||
|
commited_flushes.pop_front();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (TSurface& surface : *flush_list) {
|
||||||
|
FlushSurface(surface);
|
||||||
|
}
|
||||||
|
commited_flushes.pop_front();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
bool is_astc_supported)
|
bool is_astc_supported)
|
||||||
|
@ -1152,6 +1172,13 @@ private:
|
||||||
TView view;
|
TView view;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void AsyncFlushSurface(TSurface& surface) {
|
||||||
|
if (!uncommited_flushes) {
|
||||||
|
uncommited_flushes = std::make_shared<std::list<TSurface>>();
|
||||||
|
}
|
||||||
|
uncommited_flushes->push_back(surface);
|
||||||
|
}
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
||||||
FormatLookupTable format_lookup_table;
|
FormatLookupTable format_lookup_table;
|
||||||
|
@ -1198,6 +1225,9 @@ private:
|
||||||
|
|
||||||
std::list<TSurface> marked_for_unregister;
|
std::list<TSurface> marked_for_unregister;
|
||||||
|
|
||||||
|
std::shared_ptr<std::list<TSurface>> uncommited_flushes{};
|
||||||
|
std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes;
|
||||||
|
|
||||||
StagingCache staging_cache;
|
StagingCache staging_cache;
|
||||||
std::recursive_mutex mutex;
|
std::recursive_mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue