3
0
Fork 0
forked from suyu/suyu

Merge pull request #2429 from FernandoS27/compute

Corrections and Implementation on GPU Engines
This commit is contained in:
bunnei 2019-05-09 13:19:22 -04:00 committed by GitHub
commit c27b81cb85
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 484 additions and 143 deletions

View file

@ -3,6 +3,8 @@ add_library(video_core STATIC
dma_pusher.h dma_pusher.h
debug_utils/debug_utils.cpp debug_utils/debug_utils.cpp
debug_utils/debug_utils.h debug_utils/debug_utils.h
engines/engine_upload.cpp
engines/engine_upload.h
engines/fermi_2d.cpp engines/fermi_2d.cpp
engines/fermi_2d.h engines/fermi_2d.h
engines/kepler_compute.cpp engines/kepler_compute.cpp

View file

@ -0,0 +1,48 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
State::State(MemoryManager& memory_manager, Registers& regs)
: memory_manager(memory_manager), regs(regs) {}
void State::ProcessExec(const bool is_linear) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
inner_buffer.resize(copy_size);
this->is_linear = is_linear;
}
void State::ProcessData(const u32 data, const bool is_last_call) {
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
write_offset += sub_copy_size;
if (!is_last_call) {
return;
}
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
tmp_buffer.resize(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
}
} // namespace Tegra::Engines::Upload

View file

@ -0,0 +1,75 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines::Upload {
struct Registers {
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
};
class State {
public:
State(MemoryManager& memory_manager, Registers& regs);
~State() = default;
void ProcessExec(const bool is_linear);
void ProcessData(const u32 data, const bool is_last_call);
private:
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
std::vector<u8> tmp_buffer;
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
};
} // namespace Tegra::Engines::Upload

View file

@ -21,6 +21,12 @@ class RasterizerInterface;
namespace Tegra::Engines { namespace Tegra::Engines {
/**
* This Engine is known as G80_2D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
*/
#define FERMI2D_REG_INDEX(field_name) \ #define FERMI2D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))

View file

@ -4,12 +4,21 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h"
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines { namespace Tegra::Engines {
KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
memory_manager,
regs.upload} {}
KeplerCompute::~KeplerCompute() = default; KeplerCompute::~KeplerCompute() = default;
@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument; regs.reg_array[method_call.method] = method_call.argument;
switch (method_call.method) { switch (method_call.method) {
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break;
}
case KEPLER_COMPUTE_REG_INDEX(launch): case KEPLER_COMPUTE_REG_INDEX(launch):
// Abort execution since compute shaders can be used to alter game memory (e.g. CUDA ProcessLaunch();
// kernels)
UNREACHABLE_MSG("Compute shaders are not implemented");
break; break;
default: default:
break; break;
} }
} }
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
}
} // namespace Tegra::Engines } // namespace Tegra::Engines

View file

@ -6,22 +6,40 @@
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
namespace Core {
class System;
}
namespace Tegra { namespace Tegra {
class MemoryManager; class MemoryManager;
} }
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines { namespace Tegra::Engines {
/**
* This Engine is known as GK104_Compute. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
*/
#define KEPLER_COMPUTE_REG_INDEX(field_name) \ #define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final { class KeplerCompute final {
public: public:
explicit KeplerCompute(MemoryManager& memory_manager); explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
~KeplerCompute(); ~KeplerCompute();
static constexpr std::size_t NumConstBuffers = 8; static constexpr std::size_t NumConstBuffers = 8;
@ -31,30 +49,181 @@ public:
union { union {
struct { struct {
INSERT_PADDING_WORDS(0xAF); INSERT_PADDING_WORDS(0x60);
Upload::Registers upload;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec_upload;
u32 data_upload;
INSERT_PADDING_WORDS(0x3F);
struct {
u32 address;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
}
} launch_desc_loc;
INSERT_PADDING_WORDS(0x1);
u32 launch; u32 launch;
INSERT_PADDING_WORDS(0xC48); INSERT_PADDING_WORDS(0x4A7);
struct {
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tsc;
INSERT_PADDING_WORDS(0x3);
struct {
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tic;
INSERT_PADDING_WORDS(0x22);
struct {
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} code_loc;
INSERT_PADDING_WORDS(0x3FE);
u32 texture_const_buffer_index;
INSERT_PADDING_WORDS(0x374);
}; };
std::array<u32, NUM_REGS> reg_array; std::array<u32, NUM_REGS> reg_array;
}; };
} regs{}; } regs{};
struct LaunchParams {
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
INSERT_PADDING_WORDS(0x8);
u32 program_start;
INSERT_PADDING_WORDS(0x2);
BitField<30, 1, u32> linked_tsc;
BitField<0, 31, u32> grid_dim_x;
union {
BitField<0, 16, u32> grid_dim_y;
BitField<16, 16, u32> grid_dim_z;
};
INSERT_PADDING_WORDS(0x3);
BitField<0, 16, u32> shared_alloc;
BitField<0, 31, u32> block_dim_x;
union {
BitField<0, 16, u32> block_dim_y;
BitField<16, 16, u32> block_dim_z;
};
union {
BitField<0, 8, u32> const_buffer_enable_mask;
BitField<29, 2, u32> cache_layout;
} memory_config;
INSERT_PADDING_WORDS(0x8);
struct {
u32 address_low;
union {
BitField<0, 8, u32> address_high;
BitField<15, 17, u32> size;
};
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
address_low);
}
} const_buffer_config[8];
union {
BitField<0, 20, u32> local_pos_alloc;
BitField<27, 5, u32> barrier_alloc;
};
union {
BitField<0, 20, u32> local_neg_alloc;
BitField<24, 5, u32> gpr_alloc;
};
INSERT_PADDING_WORDS(0x11);
} launch_description;
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size"); "KeplerCompute Regs has wrong size");
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
"KeplerCompute LaunchParams has wrong size");
/// Write the value to the register identified by method. /// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call); void CallMethod(const GPU::MethodCall& method_call);
private: private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager; MemoryManager& memory_manager;
Upload::State upload_state;
void ProcessLaunch();
}; };
#define ASSERT_REG_POSITION(field_name, position) \ #define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position") "Field " #field_name " has invalid position")
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(launch, 0xAF); ASSERT_REG_POSITION(launch, 0xAF);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_loc, 0x582);
ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
#undef ASSERT_REG_POSITION #undef ASSERT_REG_POSITION

View file

@ -14,9 +14,8 @@
namespace Tegra::Engines { namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
MemoryManager& memory_manager) : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::~KeplerMemory() = default; KeplerMemory::~KeplerMemory() = default;
@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) { switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): { case KEPLERMEMORY_REG_INDEX(exec): {
ProcessExec(); upload_state.ProcessExec(regs.exec.linear != 0);
break; break;
} }
case KEPLERMEMORY_REG_INDEX(data): { case KEPLERMEMORY_REG_INDEX(data): {
ProcessData(method_call.argument, method_call.IsLastCall()); const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break; break;
} }
} }
} }
void KeplerMemory::ProcessExec() {
state.write_offset = 0;
state.copy_size = regs.line_length_in * regs.line_count;
state.inner_buffer.resize(state.copy_size);
}
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
state.write_offset += sub_copy_size;
if (is_last_call) {
const GPUVAddr address{regs.dest.Address()};
if (regs.exec.linear != 0) {
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
std::vector<u8> tmp_buffer(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
state.inner_buffer.data(), tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
}
} // namespace Tegra::Engines } // namespace Tegra::Engines

View file

@ -10,6 +10,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
namespace Core { namespace Core {
@ -20,19 +21,20 @@ namespace Tegra {
class MemoryManager; class MemoryManager;
} }
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines { namespace Tegra::Engines {
/**
* This Engine is known as P2MF. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
*/
#define KEPLERMEMORY_REG_INDEX(field_name) \ #define KEPLERMEMORY_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
class KeplerMemory final { class KeplerMemory final {
public: public:
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, KeplerMemory(Core::System& system, MemoryManager& memory_manager);
MemoryManager& memory_manager);
~KeplerMemory(); ~KeplerMemory();
/// Write the value to the register identified by method. /// Write the value to the register identified by method.
@ -45,42 +47,7 @@ public:
struct { struct {
INSERT_PADDING_WORDS(0x60); INSERT_PADDING_WORDS(0x60);
u32 line_length_in; Upload::Registers upload;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
struct { struct {
union { union {
@ -96,28 +63,17 @@ public:
}; };
} regs{}; } regs{};
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
private: private:
Core::System& system; Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager; MemoryManager& memory_manager;
Upload::State upload_state;
void ProcessExec();
void ProcessData(u32 data, bool is_last_call);
}; };
#define ASSERT_REG_POSITION(field_name, position) \ #define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position") "Field " #field_name " has invalid position")
ASSERT_REG_POSITION(line_length_in, 0x60); ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(line_count, 0x61);
ASSERT_REG_POSITION(dest, 0x62);
ASSERT_REG_POSITION(exec, 0x6C); ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D); ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION #undef ASSERT_REG_POSITION

View file

@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager) MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
*this} { macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
InitializeRegisterDefaults(); InitializeRegisterDefaults();
} }
@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessSyncPoint(); ProcessSyncPoint();
break; break;
} }
case MAXWELL3D_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case MAXWELL3D_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
dirty_flags.OnMemoryWrite();
}
break;
}
default: default:
break; break;
} }

View file

@ -14,6 +14,7 @@
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/macro_interpreter.h" #include "video_core/macro_interpreter.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
@ -32,6 +33,12 @@ class RasterizerInterface;
namespace Tegra::Engines { namespace Tegra::Engines {
/**
* This Engine is known as GF100_3D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
*/
#define MAXWELL3D_REG_INDEX(field_name) \ #define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
@ -580,7 +587,18 @@ public:
u32 bind; u32 bind;
} macros; } macros;
INSERT_PADDING_WORDS(0x69); INSERT_PADDING_WORDS(0x17);
Upload::Registers upload;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec_upload;
u32 data_upload;
INSERT_PADDING_WORDS(0x44);
struct { struct {
union { union {
@ -1176,6 +1194,8 @@ private:
/// Interpreter for the macro codes uploaded to the GPU. /// Interpreter for the macro codes uploaded to the GPU.
MacroInterpreter macro_interpreter; MacroInterpreter macro_interpreter;
Upload::State upload_state;
/// Retrieves information about a specific TIC entry from the TIC buffer. /// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const; Texture::TICEntry GetTICEntry(u32 tic_index) const;
@ -1219,6 +1239,9 @@ private:
"Field " #field_name " has invalid position") "Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45); ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1); ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(rt, 0x200);

View file

@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1); ASSERT(regs.exec.enable_2d == 1);
const std::size_t copy_size = regs.x_count * regs.y_count;
auto source_ptr{memory_manager.GetPointer(source)};
auto dst_ptr{memory_manager.GetPointer(dest)};
if (!source_ptr) {
LOG_ERROR(HW_GPU, "source_ptr is invalid");
return;
}
if (!dst_ptr) {
LOG_ERROR(HW_GPU, "dst_ptr is invalid");
return;
}
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1); ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over. // If the input is tiled and the output is linear, deswizzle the input and copy it over.
const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
const std::size_t src_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, const std::size_t dst_size = regs.dst_pitch * regs.y_count;
copy_size * src_bytes_per_pixel);
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
regs.src_params.BlockHeight(), regs.src_params.pos_x, write_buffer.data(), regs.src_params.BlockHeight(),
regs.src_params.pos_y); regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else { } else {
ASSERT(regs.dst_params.size_z == 1); ASSERT(regs.dst_params.BlockDepth() == 1);
ASSERT(regs.src_pitch == regs.x_count);
const u32 src_bpp = regs.src_pitch / regs.x_count; const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
FlushAndInvalidate(regs.src_pitch * regs.y_count, const std::size_t dst_size = Texture::CalculateSize(
regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over. // If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); src_bytes_per_pixel,
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
read_buffer.data(), regs.dst_params.BlockHeight());
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} }
} }

View file

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <vector>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
@ -25,6 +26,11 @@ class RasterizerInterface;
namespace Tegra::Engines { namespace Tegra::Engines {
/**
* This Engine is known as GK104_Copy. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
*/
class MaxwellDMA final { class MaxwellDMA final {
public: public:
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@ -63,6 +69,16 @@ public:
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
enum class ComponentMode : u32 {
Src0 = 0,
Src1 = 1,
Src2 = 2,
Src3 = 3,
Const0 = 4,
Const1 = 5,
Zero = 6,
};
enum class CopyMode : u32 { enum class CopyMode : u32 {
None = 0, None = 0,
Unk1 = 1, Unk1 = 1,
@ -128,7 +144,26 @@ public:
u32 x_count; u32 x_count;
u32 y_count; u32 y_count;
INSERT_PADDING_WORDS(0xBB); INSERT_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
union {
BitField<0, 4, ComponentMode> component0;
BitField<4, 4, ComponentMode> component1;
BitField<8, 4, ComponentMode> component2;
BitField<12, 4, ComponentMode> component3;
BitField<16, 2, u32> component_size;
BitField<20, 3, u32> src_num_components;
BitField<24, 3, u32> dst_num_components;
u32 SrcBytePerPixel() const {
return src_num_components.Value() * component_size.Value();
}
u32 DstBytePerPixel() const {
return dst_num_components.Value() * component_size.Value();
}
} swizzle_config;
Parameters dst_params; Parameters dst_params;
@ -149,6 +184,9 @@ private:
MemoryManager& memory_manager; MemoryManager& memory_manager;
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
/// Performs the copy from the source buffer to the destination buffer as configured in the /// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers. /// registers.
void HandleCopy(); void HandleCopy();
@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
ASSERT_REG_POSITION(dst_pitch, 0x105); ASSERT_REG_POSITION(dst_pitch, 0x105);
ASSERT_REG_POSITION(x_count, 0x106); ASSERT_REG_POSITION(x_count, 0x106);
ASSERT_REG_POSITION(y_count, 0x107); ASSERT_REG_POSITION(y_count, 0x107);
ASSERT_REG_POSITION(const0, 0x1C0);
ASSERT_REG_POSITION(const1, 0x1C1);
ASSERT_REG_POSITION(swizzle_config, 0x1C2);
ASSERT_REG_POSITION(dst_params, 0x1C3); ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA); ASSERT_REG_POSITION(src_params, 0x1CA);

View file

@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
} }
GPU::~GPU() = default; GPU::~GPU() = default;