forked from suyu/suyu
dma_pushbuffer: Optimize to avoid loop and copy on Push.
This commit is contained in:
parent
c568f5cea7
commit
ac74b71d75
3 changed files with 23 additions and 13 deletions
|
@ -128,11 +128,9 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PushGPUEntries(const std::vector<Tegra::CommandListHeader>& entries) {
|
static void PushGPUEntries(Tegra::CommandList&& entries) {
|
||||||
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
|
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
|
||||||
for (const auto& entry : entries) {
|
dma_pusher.Push(std::move(entries));
|
||||||
dma_pusher.Push(entry);
|
|
||||||
}
|
|
||||||
dma_pusher.DispatchCalls();
|
dma_pusher.DispatchCalls();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,11 +147,11 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader),
|
params.num_entries * sizeof(Tegra::CommandListHeader),
|
||||||
"Incorrect input size");
|
"Incorrect input size");
|
||||||
|
|
||||||
std::vector<Tegra::CommandListHeader> entries(params.num_entries);
|
Tegra::CommandList entries(params.num_entries);
|
||||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
|
|
||||||
PushGPUEntries(entries);
|
PushGPUEntries(std::move(entries));
|
||||||
|
|
||||||
params.fence_out.id = 0;
|
params.fence_out.id = 0;
|
||||||
params.fence_out.value = 0;
|
params.fence_out.value = 0;
|
||||||
|
@ -170,11 +168,11 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
|
||||||
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
||||||
params.address, params.num_entries, params.flags);
|
params.address, params.num_entries, params.flags);
|
||||||
|
|
||||||
std::vector<Tegra::CommandListHeader> entries(params.num_entries);
|
Tegra::CommandList entries(params.num_entries);
|
||||||
Memory::ReadBlock(params.address, entries.data(),
|
Memory::ReadBlock(params.address, entries.data(),
|
||||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
|
|
||||||
PushGPUEntries(entries);
|
PushGPUEntries(std::move(entries));
|
||||||
|
|
||||||
params.fence_out.id = 0;
|
params.fence_out.id = 0;
|
||||||
params.fence_out.value = 0;
|
params.fence_out.value = 0;
|
||||||
|
|
|
@ -23,6 +23,8 @@ void DmaPusher::DispatchCalls() {
|
||||||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||||
gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
|
gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
|
dma_pushbuffer_subindex = 0;
|
||||||
|
|
||||||
while (Core::System::GetInstance().IsPoweredOn()) {
|
while (Core::System::GetInstance().IsPoweredOn()) {
|
||||||
if (!Step()) {
|
if (!Step()) {
|
||||||
break;
|
break;
|
||||||
|
@ -89,11 +91,17 @@ bool DmaPusher::Step() {
|
||||||
}
|
}
|
||||||
} else if (ib_enable && !dma_pushbuffer.empty()) {
|
} else if (ib_enable && !dma_pushbuffer.empty()) {
|
||||||
// Current pushbuffer empty, but we have more IB entries to read
|
// Current pushbuffer empty, but we have more IB entries to read
|
||||||
const CommandListHeader& command_list_header{dma_pushbuffer.front()};
|
const CommandList& command_list{dma_pushbuffer.front()};
|
||||||
|
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||||
dma_get = command_list_header.addr;
|
dma_get = command_list_header.addr;
|
||||||
dma_put = dma_get + command_list_header.size * sizeof(u32);
|
dma_put = dma_get + command_list_header.size * sizeof(u32);
|
||||||
non_main = command_list_header.is_non_main;
|
non_main = command_list_header.is_non_main;
|
||||||
|
|
||||||
|
if (dma_pushbuffer_subindex >= command_list.size()) {
|
||||||
|
// We've gone through the current list, remove it from the queue
|
||||||
dma_pushbuffer.pop();
|
dma_pushbuffer.pop();
|
||||||
|
dma_pushbuffer_subindex = 0;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
|
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
|
@ -45,6 +46,8 @@ static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect
|
||||||
|
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
|
using CommandList = std::vector<Tegra::CommandListHeader>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
|
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
|
||||||
* emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
|
* emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
|
||||||
|
@ -57,8 +60,8 @@ public:
|
||||||
explicit DmaPusher(GPU& gpu);
|
explicit DmaPusher(GPU& gpu);
|
||||||
~DmaPusher();
|
~DmaPusher();
|
||||||
|
|
||||||
void Push(const CommandListHeader& command_list_header) {
|
void Push(CommandList&& entries) {
|
||||||
dma_pushbuffer.push(command_list_header);
|
dma_pushbuffer.push(std::move(entries));
|
||||||
}
|
}
|
||||||
|
|
||||||
void DispatchCalls();
|
void DispatchCalls();
|
||||||
|
@ -72,7 +75,8 @@ private:
|
||||||
|
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
|
|
||||||
std::queue<CommandListHeader> dma_pushbuffer;
|
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
|
||||||
|
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
|
||||||
|
|
||||||
struct DmaState {
|
struct DmaState {
|
||||||
u32 method; ///< Current method
|
u32 method; ///< Current method
|
||||||
|
|
Loading…
Reference in a new issue