forked from suyu/suyu
Correct Kepler Memory on Linear Pushes.
This commit is contained in:
parent
1f4dfb3998
commit
8a099ac99f
2 changed files with 48 additions and 16 deletions
|
@ -10,6 +10,8 @@
|
|||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/convert.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
|
@ -27,30 +29,40 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
|
|||
|
||||
switch (method_call.method) {
|
||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||
state.write_offset = 0;
|
||||
ProcessExec();
|
||||
break;
|
||||
}
|
||||
case KEPLERMEMORY_REG_INDEX(data): {
|
||||
ProcessData(method_call.argument);
|
||||
ProcessData(method_call.argument, method_call.IsLastCall());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerMemory::ProcessData(u32 data) {
|
||||
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
|
||||
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
|
||||
void KeplerMemory::ProcessExec() {
|
||||
state.write_offset = 0;
|
||||
state.copy_size = regs.line_length_in * regs.line_count;
|
||||
state.inner_buffer.resize(state.copy_size);
|
||||
}
|
||||
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
||||
// We do this before actually writing the new data because the destination address might
|
||||
// contain a dirty surface that will have to be written back to memory.
|
||||
const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
|
||||
memory_manager.Write<u32>(address, data);
|
||||
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
||||
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
|
||||
std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size);
|
||||
state.write_offset += sub_copy_size;
|
||||
if (is_last_call) {
|
||||
UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented");
|
||||
if (regs.exec.linear != 0) {
|
||||
const GPUVAddr address{regs.dest.Address()};
|
||||
const auto host_ptr = memory_manager.GetPointer(address);
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||
// cache. We do this before actually writing the new data because the destination
|
||||
// address might contain a dirty surface that will have to be written back to memory.
|
||||
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
||||
state.write_offset++;
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size);
|
||||
std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size);
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
@ -51,7 +52,11 @@ public:
|
|||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 pitch;
|
||||
u32 block_dimensions;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
|
@ -63,6 +68,18 @@ public:
|
|||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return 1U << block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return 1U << block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return 1U << block_depth.Value();
|
||||
}
|
||||
} dest;
|
||||
|
||||
struct {
|
||||
|
@ -81,6 +98,8 @@ public:
|
|||
|
||||
struct {
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
} state{};
|
||||
|
||||
private:
|
||||
|
@ -88,7 +107,8 @@ private:
|
|||
VideoCore::RasterizerInterface& rasterizer;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
void ProcessData(u32 data);
|
||||
void ProcessExec();
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
|
Loading…
Reference in a new issue