From a91d3fc6397560fc6294a24faeed73d45abd1753 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 22 Apr 2019 18:50:56 -0400
Subject: [PATCH 1/8] Revamp Kepler Memory to use a subegine to manage uploads

---
 src/video_core/CMakeLists.txt            |  2 +
 src/video_core/engines/engine_upload.cpp | 44 ++++++++++++++
 src/video_core/engines/engine_upload.h   | 74 ++++++++++++++++++++++++
 src/video_core/engines/kepler_memory.cpp | 45 +++-----------
 src/video_core/engines/kepler_memory.h   | 60 ++-----------------
 src/video_core/gpu.cpp                   |  2 +-
 6 files changed, 134 insertions(+), 93 deletions(-)
 create mode 100644 src/video_core/engines/engine_upload.cpp
 create mode 100644 src/video_core/engines/engine_upload.h
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 1e31a29006..7d02ec2d05 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
     dma_pusher.h
     debug_utils/debug_utils.cpp
     debug_utils/debug_utils.h
+    engines/engine_upload.cpp
+    engines/engine_upload.h
     engines/fermi_2d.cpp
     engines/fermi_2d.h
     engines/kepler_compute.cpp
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
new file mode 100644
index 0000000000..201f8e273e
--- /dev/null
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,44 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+void State::ProcessExec(const bool is_linear) {
+    write_offset = 0;
+    copy_size = regs.line_length_in * regs.line_count;
+    inner_buffer.resize(copy_size);
+    linear = is_linear;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+    write_offset += sub_copy_size;
+    if (is_last_call) {
+        const GPUVAddr address{regs.dest.Address()};
+        if (linear) {
+            memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+        } else {
+            UNIMPLEMENTED_IF(regs.dest.z != 0);
+            UNIMPLEMENTED_IF(regs.dest.depth != 1);
+            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+            const std::size_t dst_size = Tegra::Texture::CalculateSize(
+                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+            std::vector<u8> tmp_buffer(dst_size);
+            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
+                                          regs.dest.y, regs.dest.BlockHeight(), copy_size,
+                                          inner_buffer.data(), tmp_buffer.data());
+            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+        }
+    }
+}
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
new file mode 100644
index 0000000000..3a817140ab
--- /dev/null
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,74 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Data {
+    u32 line_length_in;
+    u32 line_count;
+
+    struct {
+        u32 address_high;
+        u32 address_low;
+        u32 pitch;
+        union {
+            BitField<0, 4, u32> block_width;
+            BitField<4, 4, u32> block_height;
+            BitField<8, 4, u32> block_depth;
+        };
+        u32 width;
+        u32 height;
+        u32 depth;
+        u32 z;
+        u32 x;
+        u32 y;
+
+        GPUVAddr Address() const {
+            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+        }
+
+        u32 BlockWidth() const {
+            return 1U << block_width.Value();
+        }
+
+        u32 BlockHeight() const {
+            return 1U << block_height.Value();
+        }
+
+        u32 BlockDepth() const {
+            return 1U << block_depth.Value();
+        }
+    } dest;
+};
+
+class State {
+public:
+    State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {}
+    ~State() = default;
+
+    void ProcessExec(const bool is_linear);
+    void ProcessData(const u32 data, const bool is_last_call);
+
+private:
+    u32 write_offset = 0;
+    u32 copy_size = 0;
+    std::vector<u8> inner_buffer;
+    bool linear;
+    Data& regs;
+    MemoryManager& memory_manager;
+};
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 7387886a3f..71fa499d3e 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@
 
 namespace Tegra::Engines {
 
-KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                           MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
+    : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
 
 KeplerMemory::~KeplerMemory() = default;
 
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
 
     switch (method_call.method) {
     case KEPLERMEMORY_REG_INDEX(exec): {
-        ProcessExec();
+        upload_state.ProcessExec(regs.exec.linear != 0);
         break;
     }
     case KEPLERMEMORY_REG_INDEX(data): {
-        ProcessData(method_call.argument, method_call.IsLastCall());
+        bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
         break;
     }
     }
 }
 
-void KeplerMemory::ProcessExec() {
-    state.write_offset = 0;
-    state.copy_size = regs.line_length_in * regs.line_count;
-    state.inner_buffer.resize(state.copy_size);
-}
-
-void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
-    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
-    std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
-    state.write_offset += sub_copy_size;
-    if (is_last_call) {
-        const GPUVAddr address{regs.dest.Address()};
-        if (regs.exec.linear != 0) {
-            memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
-        } else {
-            UNIMPLEMENTED_IF(regs.dest.z != 0);
-            UNIMPLEMENTED_IF(regs.dest.depth != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
-            const std::size_t dst_size = Tegra::Texture::CalculateSize(
-                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
-            std::vector<u8> tmp_buffer(dst_size);
-            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
-                                          regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
-                                          state.inner_buffer.data(), tmp_buffer.data());
-            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
-        }
-        system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
-    }
-}
-
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5f892ddad1..c6b738eb95 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 
 namespace Core {
@@ -20,10 +21,6 @@ namespace Tegra {
 class MemoryManager;
 }
 
-namespace VideoCore {
-class RasterizerInterface;
-}
-
 namespace Tegra::Engines {
 
 #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
@@ -31,8 +28,7 @@ namespace Tegra::Engines {
 
 class KeplerMemory final {
 public:
-    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, MemoryManager& memory_manager);
     ~KeplerMemory();
 
     /// Write the value to the register identified by method.
@@ -45,42 +41,7 @@ public:
             struct {
                 INSERT_PADDING_WORDS(0x60);
 
-                u32 line_length_in;
-                u32 line_count;
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 pitch;
-                    union {
-                        BitField<0, 4, u32> block_width;
-                        BitField<4, 4, u32> block_height;
-                        BitField<8, 4, u32> block_depth;
-                    };
-                    u32 width;
-                    u32 height;
-                    u32 depth;
-                    u32 z;
-                    u32 x;
-                    u32 y;
-
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-
-                    u32 BlockWidth() const {
-                        return 1U << block_width.Value();
-                    }
-
-                    u32 BlockHeight() const {
-                        return 1U << block_height.Value();
-                    }
-
-                    u32 BlockDepth() const {
-                        return 1U << block_depth.Value();
-                    }
-                } dest;
+                Upload::Data upload;
 
                 struct {
                     union {
@@ -96,28 +57,17 @@ public:
         };
     } regs{};
 
-    struct {
-        u32 write_offset = 0;
-        u32 copy_size = 0;
-        std::vector<u8> inner_buffer;
-    } state{};
-
 private:
     Core::System& system;
-    VideoCore::RasterizerInterface& rasterizer;
     MemoryManager& memory_manager;
-
-    void ProcessExec();
-    void ProcessData(u32 data, bool is_last_call);
+    Upload::State upload_state;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
     static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
                   "Field " #field_name " has invalid position")
 
-ASSERT_REG_POSITION(line_length_in, 0x60);
-ASSERT_REG_POSITION(line_count, 0x61);
-ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(upload, 0x60);
 ASSERT_REG_POSITION(exec, 0x6C);
 ASSERT_REG_POSITION(data, 0x6D);
 #undef ASSERT_REG_POSITION
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4461083ffe..9db6e4763c 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -37,7 +37,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
     fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
     kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
 }
 
 GPU::~GPU() = default;

From e4ff140b99339589d87836f865fc437719adbbe9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 22 Apr 2019 19:05:43 -0400
Subject: [PATCH 2/8] Introduce skeleton of the GPU Compute Engine.

---
 src/video_core/engines/kepler_compute.cpp |  37 ++++-
 src/video_core/engines/kepler_compute.h   | 171 +++++++++++++++++++++-
 src/video_core/gpu.cpp                    |   2 +-
 3 files changed, 202 insertions(+), 8 deletions(-)

diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index b1d9504608..28f1f6a7d2 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
-KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                             MemoryManager& memory_manager)
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
+                                                                                  memory_manager,
+                                                                                  regs.upload} {}
 
 KeplerCompute::~KeplerCompute() = default;
 
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
     regs.reg_array[method_call.method] = method_call.argument;
 
     switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+        bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
     case KEPLER_COMPUTE_REG_INDEX(launch):
-        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
-        // kernels)
-        UNREACHABLE_MSG("Compute shaders are not implemented");
+        ProcessLaunch();
         break;
     default:
         break;
     }
 }
 
+void KeplerCompute::ProcessLaunch() {
+
+    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+
+    const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
+    LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fb6cdf4329..ab2781b4b4 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,14 +6,25 @@
 
 #include <array>
 #include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 
+namespace Core {
+class System;
+}
+
 namespace Tegra {
 class MemoryManager;
 }
 
+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra::Engines {
 
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
@@ -21,7 +32,8 @@ namespace Tegra::Engines {
 
 class KeplerCompute final {
 public:
-    explicit KeplerCompute(MemoryManager& memory_manager);
+    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                           MemoryManager& memory_manager);
     ~KeplerCompute();
 
     static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +43,183 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0xAF);
+                INSERT_PADDING_WORDS(0x60);
+
+                Upload::Data upload;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x3F);
+
+                struct {
+                    u32 address;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+                    }
+                } launch_desc_loc;
+
+                INSERT_PADDING_WORDS(0x1);
 
                 u32 launch;
 
-                INSERT_PADDING_WORDS(0xC48);
+                INSERT_PADDING_WORDS(0x4A7);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tsc;
+
+                INSERT_PADDING_WORDS(0x3);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tic;
+
+                INSERT_PADDING_WORDS(0x22);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } code_loc;
+
+                INSERT_PADDING_WORDS(0x3FE);
+
+                u32 texture_const_buffer_index;
+
+                INSERT_PADDING_WORDS(0x374);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
     } regs{};
+
+    struct LaunchParams {
+        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        u32 program_start;
+
+        INSERT_PADDING_WORDS(0x2);
+
+        BitField<30, 1, u32> linked_tsc;
+
+        BitField<0, 31, u32> grid_dim_x;
+
+        union {
+            BitField<0, 16, u32> grid_dim_y;
+            BitField<16, 16, u32> grid_dim_z;
+        };
+
+        INSERT_PADDING_WORDS(0x3);
+
+        BitField<0, 16, u32> shared_alloc;
+
+        BitField<0, 31, u32> block_dim_x;
+
+        union {
+            BitField<0, 16, u32> block_dim_y;
+            BitField<16, 16, u32> block_dim_z;
+        };
+
+        union {
+            BitField<0, 8, u32> const_buffer_enable_mask;
+            BitField<29, 2, u32> cache_layout;
+        } memory_config;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        struct {
+            u32 address_low;
+            union {
+                BitField<0, 8, u32> address_high;
+                BitField<15, 17, u32> size;
+            };
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+                                             address_low);
+            }
+        } const_buffer_config[8];
+
+        union {
+            BitField<0, 20, u32> local_pos_alloc;
+            BitField<27, 5, u32> barrier_alloc;
+        };
+
+        union {
+            BitField<0, 20, u32> local_neg_alloc;
+            BitField<24, 5, u32> gpr_alloc;
+        };
+
+        INSERT_PADDING_WORDS(0x11);
+    } launch_description;
+
+    struct {
+        u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
+    } state{};
+
     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
                   "KeplerCompute Regs has wrong size");
 
+    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+                  "KeplerCompute LaunchParams has wrong size");
+
     /// Write the value to the register identified by method.
     void CallMethod(const GPU::MethodCall& method_call);
 
 private:
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
     MemoryManager& memory_manager;
+    Upload::State upload_state;
+
+    void ProcessLaunch();
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
     static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                   "Field " #field_name " has invalid position")
 
+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \
+    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
 
 #undef ASSERT_REG_POSITION
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 9db6e4763c..52706505b0 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,7 +35,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
     dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
     maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
     fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
 }

From 701ce1c9d03c6c6a70965cbba3e961ab06abf49c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 22 Apr 2019 19:27:36 -0400
Subject: [PATCH 3/8] Implement Maxwell3D Data Upload

---
 src/video_core/engines/maxwell_3d.cpp | 16 ++++++++++++++--
 src/video_core/engines/maxwell_3d.h   | 19 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9780417f20..78810dbbb5 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
 
 Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                      MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
-                                                                                  *this} {
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
+      macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
     InitializeRegisterDefaults();
 }
 
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         ProcessSyncPoint();
         break;
     }
+    case MAXWELL3D_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case MAXWELL3D_REG_INDEX(data_upload): {
+        bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
     default:
         break;
     }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index cc2424d389..47fe1f1375 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -14,6 +14,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
 #include "video_core/textures/texture.h"
@@ -579,7 +580,18 @@ public:
                     u32 bind;
                 } macros;
 
-                INSERT_PADDING_WORDS(0x69);
+                INSERT_PADDING_WORDS(0x17);
+
+                Upload::Data upload;
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x44);
 
                 struct {
                     union {
@@ -1175,6 +1187,8 @@ private:
     /// Interpreter for the macro codes uploaded to the GPU.
     MacroInterpreter macro_interpreter;
 
+    Upload::State upload_state;
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
@@ -1218,6 +1232,9 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);

From 021d28c9b81d86d52b989b4cc6828f33c6a63c9b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 23 Apr 2019 08:02:24 -0400
Subject: [PATCH 4/8] Corrections and styling

---
 src/video_core/engines/engine_upload.cpp  | 5 ++++-
 src/video_core/engines/engine_upload.h    | 4 ++--
 src/video_core/engines/kepler_compute.cpp | 2 +-
 src/video_core/engines/kepler_memory.cpp  | 2 +-
 src/video_core/engines/maxwell_3d.cpp     | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 201f8e273e..663d5517a0 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -9,11 +9,14 @@
 
 namespace Tegra::Engines::Upload {
 
+State::State(MemoryManager& memory_manager, Data& regs)
+    : memory_manager(memory_manager), regs(regs) {}
+
 void State::ProcessExec(const bool is_linear) {
     write_offset = 0;
     copy_size = regs.line_length_in * regs.line_count;
     inner_buffer.resize(copy_size);
-    linear = is_linear;
+    this->is_linear = is_linear;
 }
 
 void State::ProcessData(const u32 data, const bool is_last_call) {
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 3a817140ab..431f56030d 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -56,7 +56,7 @@ struct Data {
 
 class State {
 public:
-    State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {}
+    State(MemoryManager& memory_manager, Data& regs);
     ~State() = default;
 
     void ProcessExec(const bool is_linear);
@@ -66,7 +66,7 @@ private:
     u32 write_offset = 0;
     u32 copy_size = 0;
     std::vector<u8> inner_buffer;
-    bool linear;
+    bool is_linear;
     Data& regs;
     MemoryManager& memory_manager;
 };
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 28f1f6a7d2..7404a8163c 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -34,7 +34,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
         break;
     }
     case KEPLER_COMPUTE_REG_INDEX(data_upload): {
-        bool is_last_call = method_call.IsLastCall();
+        const bool is_last_call = method_call.IsLastCall();
         upload_state.ProcessData(method_call.argument, is_last_call);
         if (is_last_call) {
             system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 71fa499d3e..0561f676cb 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -31,7 +31,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
         break;
     }
     case KEPLERMEMORY_REG_INDEX(data): {
-        bool is_last_call = method_call.IsLastCall();
+        const bool is_last_call = method_call.IsLastCall();
         upload_state.ProcessData(method_call.argument, is_last_call);
         if (is_last_call) {
             system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 78810dbbb5..d7b586db93 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -258,7 +258,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         break;
     }
     case MAXWELL3D_REG_INDEX(data_upload): {
-        bool is_last_call = method_call.IsLastCall();
+        const bool is_last_call = method_call.IsLastCall();
         upload_state.ProcessData(method_call.argument, is_last_call);
         if (is_last_call) {
             dirty_flags.OnMemoryWrite();

From e140e2ebc65a7ad0a25ba979141e80e9376f5eff Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 23 Apr 2019 08:44:52 -0400
Subject: [PATCH 5/8] Add Documentation Headers to all the GPU Engines

---
 src/video_core/engines/fermi_2d.h       | 6 ++++++
 src/video_core/engines/kepler_compute.h | 6 ++++++
 src/video_core/engines/kepler_memory.h  | 6 ++++++
 src/video_core/engines/maxwell_3d.h     | 6 ++++++
 src/video_core/engines/maxwell_dma.h    | 5 +++++
 5 files changed, 29 insertions(+)

diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2e51b7f13d..45f59a4d99 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
 #define FERMI2D_REG_INDEX(field_name)                                                              \
     (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
 
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index ab2781b4b4..50f3188487 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -27,6 +27,12 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
 
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index c6b738eb95..473bff20ae 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -23,6 +23,12 @@ class MemoryManager;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
 #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
     (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 47fe1f1375..eae4274120 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -33,6 +33,12 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as GF100_3D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+ */
+
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
 
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c6b6498420..35b25e6b9f 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -25,6 +25,11 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as GK104_Copy. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
 class MaxwellDMA final {
 public:
     explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,

From f1e5314f1a57b09bd45e0db3ca91449e761c431a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 23 Apr 2019 11:21:00 -0400
Subject: [PATCH 6/8] Add Swizzle Parameters to the DMA engine

---
 src/video_core/engines/engine_upload.cpp |  2 +-
 src/video_core/engines/maxwell_dma.h     | 27 +++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 663d5517a0..867457f256 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -25,7 +25,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
     write_offset += sub_copy_size;
     if (is_last_call) {
         const GPUVAddr address{regs.dest.Address()};
-        if (linear) {
+        if (is_linear) {
             memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
         } else {
             UNIMPLEMENTED_IF(regs.dest.z != 0);
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 35b25e6b9f..0b2e261996 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -68,6 +68,16 @@ public:
 
         static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
 
+        enum class ComponentMode : u32 {
+            SRC0 = 0,
+            SRC1 = 1,
+            SRC2 = 2,
+            SRC3 = 3,
+            CONST0 = 4,
+            CONST1 = 5,
+            ZERO = 6,
+        };
+
         enum class CopyMode : u32 {
             None = 0,
             Unk1 = 1,
@@ -133,7 +143,19 @@ public:
                 u32 x_count;
                 u32 y_count;
 
-                INSERT_PADDING_WORDS(0xBB);
+                INSERT_PADDING_WORDS(0xB8);
+
+                u32 const0;
+                u32 const1;
+                union {
+                    BitField<0, 4, ComponentMode> component0;
+                    BitField<4, 4, ComponentMode> component1;
+                    BitField<8, 4, ComponentMode> component2;
+                    BitField<12, 4, ComponentMode> component3;
+                    BitField<16, 2, u32> component_size;
+                    BitField<20, 3, u32> src_num_components;
+                    BitField<24, 3, u32> dst_num_components;
+                } swizzle_config;
 
                 Parameters dst_params;
 
@@ -170,6 +192,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
 ASSERT_REG_POSITION(dst_pitch, 0x105);
 ASSERT_REG_POSITION(x_count, 0x106);
 ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(const0, 0x1C0);
+ASSERT_REG_POSITION(const1, 0x1C1);
+ASSERT_REG_POSITION(swizzle_config, 0x1C2);
 ASSERT_REG_POSITION(dst_params, 0x1C3);
 ASSERT_REG_POSITION(src_params, 0x1CA);
 

From b3118ee316863e1f4a35548f69bc1194bb740627 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 23 Apr 2019 12:41:55 -0400
Subject: [PATCH 7/8] Fixes and Corrections to DMA Engine

---
 src/video_core/engines/maxwell_dma.cpp | 85 ++++++++++++++------------
 src/video_core/engines/maxwell_dma.h   | 11 ++++
 2 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d00678..3a5dfef0c6 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
 
     ASSERT(regs.exec.enable_2d == 1);
 
-    const std::size_t copy_size = regs.x_count * regs.y_count;
-
-    auto source_ptr{memory_manager.GetPointer(source)};
-    auto dst_ptr{memory_manager.GetPointer(dest)};
-
-    if (!source_ptr) {
-        LOG_ERROR(HW_GPU, "source_ptr is invalid");
-        return;
-    }
-
-    if (!dst_ptr) {
-        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
-        return;
-    }
-
-    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
-        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
-        // copying.
-        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
-
-        // We have to invalidate the destination region to evict any outdated surfaces from the
-        // cache. We do this before actually writing the new data because the destination address
-        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
-    };
-
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
         ASSERT(regs.src_params.size_z == 1);
         // If the input is tiled and the output is linear, deswizzle the input and copy it over.
-
         const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const std::size_t src_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
 
-        FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
-                           copy_size * src_bytes_per_pixel);
+        const std::size_t dst_size = regs.dst_pitch * regs.y_count;
+
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 
         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
-                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
-                                  regs.src_params.pos_y);
+                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
+                                  write_buffer.data(), regs.src_params.BlockHeight(),
+                                  regs.src_params.pos_x, regs.src_params.pos_y);
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
     } else {
-        ASSERT(regs.dst_params.size_z == 1);
-        ASSERT(regs.src_pitch == regs.x_count);
+        ASSERT(regs.dst_params.BlockDepth() == 1);
 
-        const u32 src_bpp = regs.src_pitch / regs.x_count;
+        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
 
-        FlushAndInvalidate(regs.src_pitch * regs.y_count,
-                           regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
+        const std::size_t dst_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+        const std::size_t dst_layer_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+        const std::size_t src_size = regs.src_pitch * regs.y_count;
+
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 
         // If the input is linear and the output is tiled, swizzle the input and copy it over.
         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
+                                src_bytes_per_pixel,
+                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+                                read_buffer.data(), regs.dst_params.BlockHeight());
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
     }
 }
 
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 0b2e261996..8eab1332e4 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <cstddef>
+#include <vector>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -155,6 +156,13 @@ public:
                     BitField<16, 2, u32> component_size;
                     BitField<20, 3, u32> src_num_components;
                     BitField<24, 3, u32> dst_num_components;
+
+                    u32 SrcBytePerPixel() const {
+                        return src_num_components.Value() * component_size.Value();
+                    }
+                    u32 DstBytePerPixel() const {
+                        return dst_num_components.Value() * component_size.Value();
+                    }
                 } swizzle_config;
 
                 Parameters dst_params;
@@ -176,6 +184,9 @@ private:
 
     MemoryManager& memory_manager;
 
+    std::vector<u8> read_buffer;
+    std::vector<u8> write_buffer;
+
     /// Performs the copy from the source buffer to the destination buffer as configured in the
     /// registers.
     void HandleCopy();

From e64c41efe88e8f88014fef912b06b71b3df17e85 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 25 Apr 2019 12:57:10 -0400
Subject: [PATCH 8/8] Refactors and name corrections.

---
 src/video_core/engines/engine_upload.cpp | 39 ++++++++++++------------
 src/video_core/engines/engine_upload.h   |  9 +++---
 src/video_core/engines/kepler_compute.h  |  4 +--
 src/video_core/engines/kepler_memory.h   |  2 +-
 src/video_core/engines/maxwell_3d.h      |  2 +-
 src/video_core/engines/maxwell_dma.h     | 14 ++++-----
 6 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 867457f256..f8aa4ff55d 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -9,7 +9,7 @@
 
 namespace Tegra::Engines::Upload {
 
-State::State(MemoryManager& memory_manager, Data& regs)
+State::State(MemoryManager& memory_manager, Registers& regs)
     : memory_manager(memory_manager), regs(regs) {}
 
 void State::ProcessExec(const bool is_linear) {
@@ -23,24 +23,25 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
     const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
     std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
     write_offset += sub_copy_size;
-    if (is_last_call) {
-        const GPUVAddr address{regs.dest.Address()};
-        if (is_linear) {
-            memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
-        } else {
-            UNIMPLEMENTED_IF(regs.dest.z != 0);
-            UNIMPLEMENTED_IF(regs.dest.depth != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
-            const std::size_t dst_size = Tegra::Texture::CalculateSize(
-                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
-            std::vector<u8> tmp_buffer(dst_size);
-            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
-                                          regs.dest.y, regs.dest.BlockHeight(), copy_size,
-                                          inner_buffer.data(), tmp_buffer.data());
-            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
-        }
+    if (!is_last_call) {
+        return;
+    }
+    const GPUVAddr address{regs.dest.Address()};
+    if (is_linear) {
+        memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+    } else {
+        UNIMPLEMENTED_IF(regs.dest.z != 0);
+        UNIMPLEMENTED_IF(regs.dest.depth != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+        const std::size_t dst_size = Tegra::Texture::CalculateSize(
+            true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+        tmp_buffer.resize(dst_size);
+        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+        Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
+                                      regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
+                                      tmp_buffer.data());
+        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
     }
 }
 
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 431f56030d..9c6e0d21ca 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -16,7 +16,7 @@ class MemoryManager;
 
 namespace Tegra::Engines::Upload {
 
-struct Data {
+struct Registers {
     u32 line_length_in;
     u32 line_count;
 
@@ -56,7 +56,7 @@ struct Data {
 
 class State {
 public:
-    State(MemoryManager& memory_manager, Data& regs);
+    State(MemoryManager& memory_manager, Registers& regs);
     ~State() = default;
 
     void ProcessExec(const bool is_linear);
@@ -66,8 +66,9 @@ private:
     u32 write_offset = 0;
     u32 copy_size = 0;
     std::vector<u8> inner_buffer;
-    bool is_linear;
-    Data& regs;
+    std::vector<u8> tmp_buffer;
+    bool is_linear = false;
+    Registers& regs;
     MemoryManager& memory_manager;
 };
 
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 50f3188487..5250b8d9ba 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -51,7 +51,7 @@ public:
             struct {
                 INSERT_PADDING_WORDS(0x60);
 
-                Upload::Data upload;
+                Upload::Registers upload;
 
                 struct {
                     union {
@@ -131,7 +131,6 @@ public:
         BitField<30, 1, u32> linked_tsc;
 
         BitField<0, 31, u32> grid_dim_x;
-
         union {
             BitField<0, 16, u32> grid_dim_y;
             BitField<16, 16, u32> grid_dim_z;
@@ -142,7 +141,6 @@ public:
         BitField<0, 16, u32> shared_alloc;
 
         BitField<0, 31, u32> block_dim_x;
-
         union {
             BitField<0, 16, u32> block_dim_y;
             BitField<16, 16, u32> block_dim_z;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 473bff20ae..f3bc675a9d 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -47,7 +47,7 @@ public:
             struct {
                 INSERT_PADDING_WORDS(0x60);
 
-                Upload::Data upload;
+                Upload::Registers upload;
 
                 struct {
                     union {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index eae4274120..8897235354 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -588,7 +588,7 @@ public:
 
                 INSERT_PADDING_WORDS(0x17);
 
-                Upload::Data upload;
+                Upload::Registers upload;
                 struct {
                     union {
                         BitField<0, 1, u32> linear;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 8eab1332e4..e5942f671c 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -70,13 +70,13 @@ public:
         static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
 
         enum class ComponentMode : u32 {
-            SRC0 = 0,
-            SRC1 = 1,
-            SRC2 = 2,
-            SRC3 = 3,
-            CONST0 = 4,
-            CONST1 = 5,
-            ZERO = 6,
+            Src0 = 0,
+            Src1 = 1,
+            Src2 = 2,
+            Src3 = 3,
+            Const0 = 4,
+            Const1 = 5,
+            Zero = 6,
         };
 
         enum class CopyMode : u32 {