3
0
Fork 0
forked from suyu/suyu

Merge pull request #1264 from degasus/optimizations

video_core: Optimize the command processor.
This commit is contained in:
bunnei 2018-09-10 18:02:47 -04:00 committed by GitHub
commit ae0c95efcc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 123 additions and 126 deletions

View file

@ -8,6 +8,7 @@
#include "core/core.h" #include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/command_processor.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
params.address, params.num_entries, params.flags); params.address, params.num_entries, params.flags);
ASSERT_MSG(input.size() == ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry), params.num_entries * sizeof(Tegra::CommandListHeader),
"Incorrect input size"); "Incorrect input size");
std::vector<IoctlGpfifoEntry> entries(params.num_entries); std::vector<Tegra::CommandListHeader> entries(params.num_entries);
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
params.num_entries * sizeof(IoctlGpfifoEntry)); params.num_entries * sizeof(Tegra::CommandListHeader));
for (auto entry : entries) {
Tegra::GPUVAddr va_addr = entry.Address(); Core::System::GetInstance().GPU().ProcessCommandLists(entries);
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
}
params.fence_out.id = 0; params.fence_out.id = 0;
params.fence_out.value = 0; params.fence_out.value = 0;
std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo)); std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
params.address, params.num_entries, params.flags); params.address, params.num_entries, params.flags);
std::vector<IoctlGpfifoEntry> entries(params.num_entries); std::vector<Tegra::CommandListHeader> entries(params.num_entries);
Memory::ReadBlock(params.address, entries.data(), Memory::ReadBlock(params.address, entries.data(),
params.num_entries * sizeof(IoctlGpfifoEntry)); params.num_entries * sizeof(Tegra::CommandListHeader));
Core::System::GetInstance().GPU().ProcessCommandLists(entries);
for (auto entry : entries) {
Tegra::GPUVAddr va_addr = entry.Address();
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
}
params.fence_out.id = 0; params.fence_out.id = 0;
params.fence_out.value = 0; params.fence_out.value = 0;
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());

View file

@ -10,7 +10,6 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/swap.h" #include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "video_core/memory_manager.h"
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
@ -151,22 +150,6 @@ private:
}; };
static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size"); static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
struct IoctlGpfifoEntry {
u32_le entry0; // gpu_va_lo
union {
u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
BitField<0, 8, u32_le> gpu_va_hi;
BitField<8, 2, u32_le> unk1;
BitField<10, 21, u32_le> sz;
BitField<31, 1, u32_le> unk2;
};
Tegra::GPUVAddr Address() const {
return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
}
};
static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
struct IoctlSubmitGpfifo { struct IoctlSubmitGpfifo {
u64_le address; // pointer to gpfifo entry structs u64_le address; // pointer to gpfifo entry structs
u32_le num_entries; // number of fence objects being submitted u32_le num_entries; // number of fence objects being submitted

View file

@ -28,7 +28,12 @@ enum class BufferMethods {
CountBufferMethods = 0x40, CountBufferMethods = 0x40,
}; };
void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
MICROPROFILE_SCOPE(ProcessCommandLists);
auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
LOG_TRACE(HW_GPU, LOG_TRACE(HW_GPU,
"Processing method {:08X} on subchannel {} value " "Processing method {:08X} on subchannel {} value "
"{:08X} remaining params {}", "{:08X} remaining params {}",
@ -67,9 +72,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
default: default:
UNIMPLEMENTED_MSG("Unimplemented engine"); UNIMPLEMENTED_MSG("Unimplemented engine");
} }
} };
void GPU::ProcessCommandList(GPUVAddr address, u32 size) { for (auto entry : commands) {
Tegra::GPUVAddr address = entry.Address();
u32 size = entry.sz;
const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
VAddr current_addr = *head_address; VAddr current_addr = *head_address;
while (current_addr < *head_address + size * sizeof(CommandHeader)) { while (current_addr < *head_address + size * sizeof(CommandHeader)) {
@ -100,8 +107,8 @@ void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
case SubmissionMode::IncreaseOnce: { case SubmissionMode::IncreaseOnce: {
ASSERT(header.arg_count.Value() >= 1); ASSERT(header.arg_count.Value() >= 1);
// Use the original method for the first argument and then the next method for all other // Use the original method for the first argument and then the next method for all
// arguments. // other arguments.
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
header.arg_count - 1); header.arg_count - 1);
current_addr += sizeof(u32); current_addr += sizeof(u32);
@ -122,6 +129,7 @@ void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
} }
}
} }
} // namespace Tegra } // namespace Tegra

View file

@ -7,6 +7,7 @@
#include <type_traits> #include <type_traits>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace Tegra { namespace Tegra {
@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
IncreaseOnce = 5 IncreaseOnce = 5
}; };
struct CommandListHeader {
u32 entry0; // gpu_va_lo
union {
u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
BitField<0, 8, u32> gpu_va_hi;
BitField<8, 2, u32> unk1;
BitField<10, 21, u32> sz;
BitField<31, 1, u32> unk2;
};
GPUVAddr Address() const {
return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
}
};
static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
union CommandHeader { union CommandHeader {
u32 hex; u32 hex;

View file

@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
break; break;
} }
rasterizer.NotifyMaxwellRegisterChanged(method);
if (debug_context) { if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr); debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
} }

View file

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <memory> #include <memory>
#include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h" #include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
/// Returns the number of bytes per pixel of each depth format. /// Returns the number of bytes per pixel of each depth format.
u32 DepthFormatBytesPerPixel(DepthFormat format); u32 DepthFormatBytesPerPixel(DepthFormat format);
struct CommandListHeader;
class DebugContext; class DebugContext;
/** /**
@ -115,7 +117,7 @@ public:
~GPU(); ~GPU();
/// Processes a command list stored at the specified address in GPU memory. /// Processes a command list stored at the specified address in GPU memory.
void ProcessCommandList(GPUVAddr address, u32 size); void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
/// Returns a reference to the Maxwell3D GPU engine. /// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D(); Engines::Maxwell3D& Maxwell3D();
@ -130,9 +132,6 @@ public:
const Tegra::MemoryManager& MemoryManager() const; const Tegra::MemoryManager& MemoryManager() const;
private: private:
/// Writes a single register in the engine bound to the specified subchannel
void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
std::unique_ptr<Tegra::MemoryManager> memory_manager; std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids. /// Mapping of command subchannels to their bound engine ids.

View file

@ -20,9 +20,6 @@ public:
/// Clear the current framebuffer /// Clear the current framebuffer
virtual void Clear() = 0; virtual void Clear() = 0;
/// Notify rasterizer that the specified Maxwell register has been changed
virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory /// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0; virtual void FlushAll() = 0;

View file

@ -527,8 +527,6 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply(); state.Apply();
} }
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {} void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}

View file

@ -45,7 +45,6 @@ public:
void DrawArrays() override; void DrawArrays() override;
void Clear() override; void Clear() override;
void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;