DMAPusher: Improve collection of non executing methods

This commit is contained in:
Fernando Sahmkow 2022-11-27 00:58:06 +01:00
parent ce448ce770
commit cb1497d0d7
13 changed files with 181 additions and 2 deletions

View file

@ -178,6 +178,11 @@ void DmaPusher::CallMethod(u32 argument) const {
});
} else {
auto subchannel = subchannels[dma_state.subchannel];
if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
subchannel->method_sink.emplace_back(dma_state.method, argument);
return;
}
subchannel->ConsumeSink();
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
}
@ -189,6 +194,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
dma_state.method_count);
} else {
auto subchannel = subchannels[dma_state.subchannel];
subchannel->ConsumeSink();
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
dma_state.method_count);

View file

@ -3,6 +3,10 @@
#pragma once
#include <bitset>
#include <limits>
#include <vector>
#include "common/common_types.h"
namespace Tegra::Engines {
@ -18,8 +22,25 @@ public:
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) = 0;
void ConsumeSink() {
if (method_sink.empty()) {
return;
}
ConsumeSinkImpl();
}
std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
std::vector<std::pair<u32, u32>> method_sink{};
bool current_dirty{};
GPUVAddr current_dma_segment;
protected:
virtual void ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
CallMethod(method, value, true);
}
method_sink.clear();
}
};
} // namespace Tegra::Engines

View file

@ -25,6 +25,9 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
execution_mask.reset();
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
}
Fermi2D::~Fermi2D() = default;
@ -49,6 +52,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
}
}
void Fermi2D::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void Fermi2D::Blit() {
MICROPROFILE_SCOPE(GPU_BlitEngine);
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",

View file

@ -309,6 +309,8 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
void Blit();
void ConsumeSinkImpl() override;
};
#define ASSERT_REG_POSITION(field_name, position) \

View file

@ -14,7 +14,12 @@
namespace Tegra::Engines {
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
execution_mask.reset();
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
}
KeplerCompute::~KeplerCompute() = default;
@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
upload_state.BindRasterizer(rasterizer);
}
void KeplerCompute::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");

View file

@ -204,6 +204,8 @@ public:
private:
void ProcessLaunch();
void ConsumeSinkImpl() override;
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;

View file

@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
upload_state.BindRasterizer(rasterizer_);
execution_mask.reset();
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
}
void KeplerMemory::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {

View file

@ -73,6 +73,8 @@ public:
} regs{};
private:
void ConsumeSinkImpl() override;
Core::System& system;
Upload::State upload_state;
};

View file

@ -4,6 +4,7 @@
#include <cstring>
#include <optional>
#include "common/assert.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
@ -30,6 +31,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
execution_mask.reset();
for (size_t i = 0; i < execution_mask.size(); i++) {
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
}
}
Maxwell3D::~Maxwell3D() = default;
@ -123,6 +128,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
shadow_state = regs;
}
bool Maxwell3D::IsMethodExecutable(u32 method) {
if (method >= MacroRegistersStart) {
return true;
}
switch (method) {
case MAXWELL3D_REG_INDEX(draw.end):
case MAXWELL3D_REG_INDEX(draw.begin):
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
case MAXWELL3D_REG_INDEX(index_buffer.first):
case MAXWELL3D_REG_INDEX(index_buffer.count):
case MAXWELL3D_REG_INDEX(draw_inline_index):
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer32_first):
case MAXWELL3D_REG_INDEX(index_buffer16_first):
case MAXWELL3D_REG_INDEX(index_buffer8_first):
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
case MAXWELL3D_REG_INDEX(wait_for_idle):
case MAXWELL3D_REG_INDEX(shadow_ram_control):
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
case MAXWELL3D_REG_INDEX(load_mme.instruction):
case MAXWELL3D_REG_INDEX(load_mme.start_address):
case MAXWELL3D_REG_INDEX(falcon[4]):
case MAXWELL3D_REG_INDEX(const_buffer.buffer):
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
case MAXWELL3D_REG_INDEX(topology_override):
case MAXWELL3D_REG_INDEX(clear_surface):
case MAXWELL3D_REG_INDEX(report_semaphore.query):
case MAXWELL3D_REG_INDEX(render_enable.mode):
case MAXWELL3D_REG_INDEX(clear_report_value):
case MAXWELL3D_REG_INDEX(sync_info):
case MAXWELL3D_REG_INDEX(launch_dma):
case MAXWELL3D_REG_INDEX(inline_data):
case MAXWELL3D_REG_INDEX(fragment_barrier):
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return true;
default:
return false;
}
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
@ -141,6 +211,7 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
ConsumeSink();
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
macro_addresses.clear();
@ -214,6 +285,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
return argument;
}
void Maxwell3D::ConsumeSinkImpl() {
SCOPE_EXIT({ method_sink.clear(); });
const auto control = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track ||
control == Regs::ShadowRamControl::TrackWithFilter) {
for (auto [method, value] : method_sink) {
shadow_state.reg_array[method] = value;
ProcessDirtyRegisters(method, value);
}
return;
}
if (control == Regs::ShadowRamControl::Replay) {
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
}
return;
}
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, value);
}
}
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
if (regs.reg_array[method] == argument) {
return;

View file

@ -3123,6 +3123,8 @@ private:
void ProcessDirtyRegisters(u32 method, u32 argument);
void ConsumeSinkImpl() override;
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
/// Retrieves information about a specific TIC entry from the TIC buffer.
@ -3172,6 +3174,8 @@ private:
void RefreshParametersImpl();
bool IsMethodExecutable(u32 method);
Core::System& system;
MemoryManager& memory_manager;

View file

@ -21,7 +21,10 @@ namespace Tegra::Engines {
using namespace Texture;
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_} {}
: system{system_}, memory_manager{memory_manager_} {
execution_mask.reset();
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
}
MaxwellDMA::~MaxwellDMA() = default;
@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void MaxwellDMA::ConsumeSinkImpl() {
for (auto [method, value] : method_sink) {
regs.reg_array[method] = value;
}
method_sink.clear();
}
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");

View file

@ -231,6 +231,8 @@ private:
void ReleaseSemaphore();
void ConsumeSinkImpl() override;
Core::System& system;
MemoryManager& memory_manager;

View file

@ -127,6 +127,7 @@ private:
const u32 vertex_first = parameters[3];
const u32 vertex_count = parameters[1];
if (maxwell3d.AnyParametersDirty() &&
maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) {
ASSERT_MSG(false, "Faulty draw!");
@ -135,6 +136,7 @@ private:
const u32 base_instance = parameters[4];
if (extended) {
maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
}
@ -144,6 +146,7 @@ private:
vertex_first, vertex_count, base_instance, instance_count);
if (extended) {
maxwell3d.regs.global_base_instance_index = 0;
maxwell3d.engine_state = Maxwell::EngineHint::None;
maxwell3d.replace_table.clear();
}