Extra nvdrv support (#162)

* FinishInitalize needed for 3.0.1+ games * nvdrv:s and nvdrv:t both use NVDRV * Most settings return 0 on hardware, disabled NV_MEMORY_PROFILER for now. NVN_THROUGH_OPENGL & NVRM_GPU_PREVENT_USE are a few interesting settings to look at. Carefully choosing settings can help with drawing graphics later on * Initial /dev/nvhost-gpu support * ZCullBind * Stubbed SetErrorNotifier * Fixed SetErrorNotifier log, Added SetChannelPriority * Allocate GPFIFO Ex2, Allocate Obj Ctx, Submit GPFIFO * oops * Fixed up naming/structs/enums. Used vector instead of array for "gpfifo_entry" * Added missing fixes * /dev/nvhost-ctrl-gpu * unneeded struct * Forgot u32 in enum class * Automatic descriptor swapping for ioctls, fixed nvgpu_gpu_get_tpc_masks_args being incorrect size * nvdrv#QueryEvent * Renamed logs for nvdrv * Refactor ioctl so nv_result isn't needed * /dev/nvhost-as-gpu * Fixed Log service naming, CtxObjects now u32, renamed all structs, added static_asserts to structs, used INSERT_PADDING_WORDS instead of u32s * nvdevices now uses "Ioctl" union, * IoctlGpfifoEntry now uses bit field * final changes
2018-02-05 18:19:31 -08:00 · 2018-02-05 18:19:31 -08:00 · d129905a66
commit d129905a66
parent 294b2b2c17
17 changed files with 765 additions and 37 deletions
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@ -129,6 +129,10 @@ add_library(core STATIC
    hle/service/nvdrv/devices/nvhost_as_gpu.h
    hle/service/nvdrv/devices/nvhost_ctrl.cpp
    hle/service/nvdrv/devices/nvhost_ctrl.h
+    hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+    hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+    hle/service/nvdrv/devices/nvhost_gpu.cpp
+    hle/service/nvdrv/devices/nvhost_gpu.h
    hle/service/nvdrv/devices/nvmap.cpp
    hle/service/nvdrv/devices/nvmap.h
    hle/service/nvdrv/interface.cpp
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@ -5,7 +5,9 @@
 #pragma once

 #include <vector>
+#include "common/bit_field.h"
 #include "common/common_types.h"
+#include "common/swap.h"

 namespace Service {
 namespace Nvidia {
@ -17,6 +19,14 @@ class nvdevice {
 public:
    nvdevice() = default;
    virtual ~nvdevice() = default;
+    union Ioctl {
+        u32_le raw;
+        BitField<0, 8, u32_le> cmd;
+        BitField<8, 8, u32_le> group;
+        BitField<16, 14, u32_le> length;
+        BitField<30, 1, u32_le> is_in;
+        BitField<31, 1, u32_le> is_out;
+    };

    /**
     * Handles an ioctl request.
@ -25,7 +35,7 @@ public:
     * @param output A buffer where the output data will be written to.
     * @returns The result code of the ioctl.
     */
-    virtual u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) = 0;
+    virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0;
 };

 } // namespace Devices
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@ -14,7 +14,7 @@ namespace Service {
 namespace Nvidia {
 namespace Devices {

-u32 nvdisp_disp0::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    UNIMPLEMENTED();
    return 0;
 }
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@ -20,7 +20,7 @@ public:
    nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvdevice(), nvmap_dev(std::move(nvmap_dev)) {}
    ~nvdisp_disp0() = default;

-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride);
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@ -10,8 +10,82 @@ namespace Service {
 namespace Nvidia {
 namespace Devices {

-u32 nvhost_as_gpu::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
-    UNIMPLEMENTED();
+u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%llx, output_size=0x%llx",
+              command, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocInitalizeExCommand:
+        return InitalizeEx(input, output);
+    case IoctlCommand::IocAllocateSpaceCommand:
+        return AllocateSpace(input, output);
+    case IoctlCommand::IocMapBufferExCommand:
+        return MapBufferEx(input, output);
+    case IoctlCommand::IocBindChannelCommand:
+        return BindChannel(input, output);
+    case IoctlCommand::IocGetVaRegionsCommand:
+        return GetVARegions(input, output);
+    }
+    return 0;
+}
+
+u32 nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlInitalizeEx params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x%x", params.big_page_size);
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlAllocSpace params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, pages=%x, page_size=%x, flags=%x", params.pages,
+                params.page_size, params.flags);
+    params.offset = 0xdeadbeef; // TODO(ogniK): Actually allocate space and give a real offset
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBufferEx params{};
+    std::memcpy(&params, input.data(), input.size());
+
+    LOG_WARNING(Service_NVDRV,
+                "(STUBBED) called, flags=%x, nvmap_handle=%x, buffer_offset=%lx, mapping_size=%lx, "
+                "offset=%lx",
+                params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
+                params.offset);
+    params.offset = 0x0; // TODO(ogniK): Actually map and give a real offset
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlBindChannel params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, fd=%x", params.fd);
+    channel = params.fd;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetVaRegions params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr=%lx, buf_size=%x", params.buf_addr,
+                params.buf_size);
+
+    params.buf_size = 0x30;
+    params.regions[0].offset = 0x04000000;
+    params.regions[0].page_size = 0x1000;
+    params.regions[0].pages = 0x3fbfff;
+
+    params.regions[1].offset = 0x04000000;
+    params.regions[1].page_size = 0x10000;
+    params.regions[1].pages = 0x1bffff;
+    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+    std::memcpy(output.data(), &params, output.size());
    return 0;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@ -6,6 +6,7 @@

 #include <vector>
 #include "common/common_types.h"
+#include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"

 namespace Service {
@ -17,7 +18,80 @@ public:
    nvhost_as_gpu() = default;
    ~nvhost_as_gpu() override = default;

-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocInitalizeExCommand = 0x40284109,
+        IocAllocateSpaceCommand = 0xC0184102,
+        IocMapBufferExCommand = 0xC0284106,
+        IocBindChannelCommand = 0x40044101,
+        IocGetVaRegionsCommand = 0xC0404108,
+    };
+
+    struct IoctlInitalizeEx {
+        u32_le big_page_size; // depends on GPU's available_big_page_sizes; 0=default
+        s32_le as_fd;         // ignored; passes 0
+        u32_le flags;         // passes 0
+        u32_le reserved;      // ignored; passes 0
+        u64_le unk0;
+        u64_le unk1;
+        u64_le unk2;
+    };
+    static_assert(sizeof(IoctlInitalizeEx) == 40, "IoctlInitalizeEx is incorrect size");
+
+    struct IoctlAllocSpace {
+        u32_le pages;
+        u32_le page_size;
+        u32_le flags;
+        INSERT_PADDING_WORDS(1);
+        union {
+            u64_le offset;
+            u64_le align;
+        };
+    };
+    static_assert(sizeof(IoctlAllocSpace) == 24, "IoctlInitalizeEx is incorrect size");
+
+    struct IoctlMapBufferEx {
+        u32_le flags; // bit0: fixed_offset, bit2: cacheable
+        u32_le kind;  // -1 is default
+        u32_le nvmap_handle;
+        u32_le page_size; // 0 means don't care
+        u64_le buffer_offset;
+        u64_le mapping_size;
+        u64_le offset;
+    };
+    static_assert(sizeof(IoctlMapBufferEx) == 40, "IoctlMapBufferEx is incorrect size");
+
+    struct IoctlBindChannel {
+        u32_le fd;
+    };
+    static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");
+
+    struct IoctlVaRegion {
+        u64_le offset;
+        u32_le page_size;
+        INSERT_PADDING_WORDS(1);
+        u64_le pages;
+    };
+    static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
+
+    struct IoctlGetVaRegions {
+        u64_le buf_addr; // (contained output user ptr on linux, ignored)
+        u32_le buf_size; // forced to 2*sizeof(struct va_region)
+        u32_le reserved;
+        IoctlVaRegion regions[2];
+    };
+    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
+                  "IoctlGetVaRegions is incorrect size");
+
+    u32 channel{};
+
+    u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@ -10,12 +10,12 @@ namespace Service {
 namespace Nvidia {
 namespace Devices {

-u32 nvhost_ctrl::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%lx, output_size=0x%lx", command,
              input.size(), output.size());

-    switch (command) {
-    case IocGetConfigCommand:
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocGetConfigCommand:
        return NvOsGetConfigU32(input, output);
    }
    UNIMPLEMENTED();
@ -23,19 +23,23 @@ u32 nvhost_ctrl::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8
 }

 u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output) {
-    IocGetConfigParams params;
+    IocGetConfigParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
    LOG_DEBUG(Service_NVDRV, "called, setting=%s!%s", params.domain_str.data(),
              params.param_str.data());

    if (!strcmp(params.domain_str.data(), "nv")) {
        if (!strcmp(params.param_str.data(), "NV_MEMORY_PROFILER")) {
-            params.config_str[0] = '1';
+            params.config_str[0] = '0';
+        } else if (!strcmp(params.param_str.data(), "NVN_THROUGH_OPENGL")) {
+            params.config_str[0] = '0';
+        } else if (!strcmp(params.param_str.data(), "NVRM_GPU_PREVENT_USE")) {
+            params.config_str[0] = '0';
        } else {
-            UNIMPLEMENTED();
+            params.config_str[0] = '0';
        }
    } else {
-        UNIMPLEMENTED();
+        UNIMPLEMENTED(); // unknown domain? Only nv has been seen so far on hardware
    }
    std::memcpy(output.data(), &params, sizeof(params));
    return 0;
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@ -20,10 +20,10 @@ public:
    nvhost_ctrl() = default;
    ~nvhost_ctrl() override = default;

-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

 private:
-    enum IoctlCommands {
+    enum class IoctlCommand : u32_le {
        IocSyncptReadCommand = 0xC0080014,
        IocSyncptIncrCommand = 0x40040015,
        IocSyncptWaitCommand = 0xC00C0016,
@ -39,6 +39,7 @@ private:
        std::array<char, 0x41> param_str;
        std::array<char, 0x101> config_str;
    };
+    static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");

    u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
 };
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@ -0,0 +1,114 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+
+u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%llx, output_size=0x%llx",
+              command, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocGetCharacteristicsCommand:
+        return GetCharacteristics(input, output);
+    case IoctlCommand::IocGetTPCMasksCommand:
+        return GetTPCMasks(input, output);
+    case IoctlCommand::IocGetActiveSlotMaskCommand:
+        return GetActiveSlotMask(input, output);
+    case IoctlCommand::IocZcullGetCtxSizeCommand:
+        return ZCullGetCtxSize(input, output);
+    case IoctlCommand::IocZcullGetInfo:
+        return ZCullGetInfo(input, output);
+    }
+    UNIMPLEMENTED();
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlCharacteristics params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.gc.arch = 0x120;
+    params.gc.impl = 0xb;
+    params.gc.rev = 0xa1;
+    params.gc.num_gpc = 0x1;
+    params.gc.l2_cache_size = 0x40000;
+    params.gc.on_board_video_memory_size = 0x0;
+    params.gc.num_tpc_per_gpc = 0x2;
+    params.gc.bus_type = 0x20;
+    params.gc.big_page_size = 0x20000;
+    params.gc.compression_page_size = 0x20000;
+    params.gc.pde_coverage_bit_count = 0x1B;
+    params.gc.available_big_page_sizes = 0x30000;
+    params.gc.gpc_mask = 0x1;
+    params.gc.sm_arch_sm_version = 0x503;
+    params.gc.sm_arch_spa_version = 0x503;
+    params.gc.sm_arch_warp_count = 0x80;
+    params.gc.gpu_va_bit_count = 0x28;
+    params.gc.reserved = 0x0;
+    params.gc.flags = 0x55;
+    params.gc.twod_class = 0x902D;
+    params.gc.threed_class = 0xB197;
+    params.gc.compute_class = 0xB1C0;
+    params.gc.gpfifo_class = 0xB06F;
+    params.gc.inline_to_memory_class = 0xA140;
+    params.gc.dma_copy_class = 0xB0B5;
+    params.gc.max_fbps_count = 0x1;
+    params.gc.fbp_en_mask = 0x0;
+    params.gc.max_ltc_per_fbp = 0x2;
+    params.gc.max_lts_per_ltc = 0x1;
+    params.gc.max_tex_per_tpc = 0x0;
+    params.gc.max_gpc_count = 0x1;
+    params.gc.rop_l2_en_mask_0 = 0x21D70;
+    params.gc.rop_l2_en_mask_1 = 0x0;
+    params.gc.chipname = 0x6230326D67;
+    params.gc.gr_compbit_store_base_hw = 0x0;
+    params.gpu_characteristics_buf_size = 0xA0;
+    params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED)
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGpuGetTpcMasksArgs params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, mask=0x%x, mask_buf_addr=0x%lx",
+                params.mask_buf_size, params.mask_buf_addr);
+    std::memcpy(output.data(), &params, sizeof(params));
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlActiveSlotMask params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.slot = 0x07;
+    params.mask = 0x01;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlZcullGetCtxSize params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.size = 0x1;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    std::memset(output.data(), 0, output.size());
+    return 0;
+}
+
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@ -0,0 +1,130 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+
+class nvhost_ctrl_gpu final : public nvdevice {
+public:
+    nvhost_ctrl_gpu() = default;
+    ~nvhost_ctrl_gpu() override = default;
+
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocGetCharacteristicsCommand = 0xC0B04705,
+        IocGetTPCMasksCommand = 0xC0184706,
+        IocGetActiveSlotMaskCommand = 0x80084714,
+        IocZcullGetCtxSizeCommand = 0x80044701,
+        IocZcullGetInfo = 0x80284702,
+    };
+
+    struct IoctlGpuCharacteristics {
+        u32_le arch;                       // 0x120 (NVGPU_GPU_ARCH_GM200)
+        u32_le impl;                       // 0xB (NVGPU_GPU_IMPL_GM20B)
+        u32_le rev;                        // 0xA1 (Revision A1)
+        u32_le num_gpc;                    // 0x1
+        u64_le l2_cache_size;              // 0x40000
+        u64_le on_board_video_memory_size; // 0x0 (not used)
+        u32_le num_tpc_per_gpc;            // 0x2
+        u32_le bus_type;                   // 0x20 (NVGPU_GPU_BUS_TYPE_AXI)
+        u32_le big_page_size;              // 0x20000
+        u32_le compression_page_size;      // 0x20000
+        u32_le pde_coverage_bit_count;     // 0x1B
+        u32_le available_big_page_sizes;   // 0x30000
+        u32_le gpc_mask;                   // 0x1
+        u32_le sm_arch_sm_version;         // 0x503 (Maxwell Generation 5.0.3?)
+        u32_le sm_arch_spa_version;        // 0x503 (Maxwell Generation 5.0.3?)
+        u32_le sm_arch_warp_count;         // 0x80
+        u32_le gpu_va_bit_count;           // 0x28
+        u32_le reserved;                   // NULL
+        u64_le flags;                      // 0x55
+        u32_le twod_class;                 // 0x902D (FERMI_TWOD_A)
+        u32_le threed_class;               // 0xB197 (MAXWELL_B)
+        u32_le compute_class;              // 0xB1C0 (MAXWELL_COMPUTE_B)
+        u32_le gpfifo_class;               // 0xB06F (MAXWELL_CHANNEL_GPFIFO_A)
+        u32_le inline_to_memory_class;     // 0xA140 (KEPLER_INLINE_TO_MEMORY_B)
+        u32_le dma_copy_class;             // 0xB0B5 (MAXWELL_DMA_COPY_A)
+        u32_le max_fbps_count;             // 0x1
+        u32_le fbp_en_mask;                // 0x0 (disabled)
+        u32_le max_ltc_per_fbp;            // 0x2
+        u32_le max_lts_per_ltc;            // 0x1
+        u32_le max_tex_per_tpc;            // 0x0 (not supported)
+        u32_le max_gpc_count;              // 0x1
+        u32_le rop_l2_en_mask_0;           // 0x21D70 (fuse_status_opt_rop_l2_fbp_r)
+        u32_le rop_l2_en_mask_1;           // 0x0
+        u64_le chipname;                   // 0x6230326D67 ("gm20b")
+        u64_le gr_compbit_store_base_hw;   // 0x0 (not supported)
+    };
+    static_assert(sizeof(IoctlGpuCharacteristics) == 160,
+                  "IoctlGpuCharacteristics is incorrect size");
+
+    struct IoctlCharacteristics {
+        u64_le gpu_characteristics_buf_size; // must not be NULL, but gets overwritten with
+                                             // 0xA0=max_size
+        u64_le gpu_characteristics_buf_addr; // ignored, but must not be NULL
+        IoctlGpuCharacteristics gc;
+    };
+    static_assert(sizeof(IoctlCharacteristics) == 16 + sizeof(IoctlGpuCharacteristics),
+                  "IoctlCharacteristics is incorrect size");
+
+    struct IoctlGpuGetTpcMasksArgs {
+        /// [in]  TPC mask buffer size reserved by userspace. Should be at least
+        /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
+        /// [out] full kernel buffer size
+        u32_le mask_buf_size;
+        u32_le reserved;
+
+        /// [in]  pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
+        /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
+        u64_le mask_buf_addr;
+        u64_le unk; // Nintendo add this?
+    };
+    static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
+                  "IoctlGpuGetTpcMasksArgs is incorrect size");
+
+    struct IoctlActiveSlotMask {
+        u32_le slot; // always 0x07
+        u32_le mask;
+    };
+    static_assert(sizeof(IoctlActiveSlotMask) == 8, "IoctlActiveSlotMask is incorrect size");
+
+    struct IoctlZcullGetCtxSize {
+        u32_le size;
+    };
+    static_assert(sizeof(IoctlZcullGetCtxSize) == 4, "IoctlZcullGetCtxSize is incorrect size");
+
+    struct IoctlNvgpuGpuZcullGetInfoArgs {
+        u32_le width_align_pixels;
+        u32_le height_align_pixels;
+        u32_le pixel_squares_by_aliquots;
+        u32_le aliquot_total;
+        u32_le region_byte_multiplier;
+        u32_le region_header_size;
+        u32_le subregion_header_size;
+        u32_le subregion_width_align_pixels;
+        u32_le subregion_height_align_pixels;
+        u32_le subregion_count;
+    };
+    static_assert(sizeof(IoctlNvgpuGpuZcullGetInfoArgs) == 40,
+                  "IoctlNvgpuGpuZcullGetInfoArgs is incorrect size");
+
+    u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
+};
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@ -0,0 +1,144 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+
+u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%llx, output_size=0x%llx",
+              command, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocSetNVMAPfdCommand:
+        return SetNVMAPfd(input, output);
+    case IoctlCommand::IocSetClientDataCommand:
+        return SetClientData(input, output);
+    case IoctlCommand::IocGetClientDataCommand:
+        return GetClientData(input, output);
+    case IoctlCommand::IocZCullBind:
+        return ZCullBind(input, output);
+    case IoctlCommand::IocSetErrorNotifierCommand:
+        return SetErrorNotifier(input, output);
+    case IoctlCommand::IocChannelSetPriorityCommand:
+        return SetChannelPriority(input, output);
+    case IoctlCommand::IocAllocGPFIFOEx2Command:
+        return AllocGPFIFOEx2(input, output);
+    case IoctlCommand::IocAllocObjCtxCommand:
+        return AllocateObjectContext(input, output);
+    }
+
+    if (command.group == NVGPU_IOCTL_MAGIC) {
+        if (command.cmd == NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO) {
+            return SubmitGPFIFO(input, output);
+        }
+    }
+
+    UNIMPLEMENTED();
+    return 0;
+};
+
+u32 nvhost_gpu::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetNvmapFD params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, fd=%x", params.nvmap_fd);
+    nvmap_fd = params.nvmap_fd;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlClientData params{};
+    std::memcpy(&params, input.data(), input.size());
+    user_data = params.data;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::GetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlClientData params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.data = user_data;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::ZCullBind(const std::vector<u8>& input, std::vector<u8>& output) {
+    std::memcpy(&zcull_params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, gpu_va=%lx, mode=%x", zcull_params.gpu_va, zcull_params.mode);
+    std::memcpy(output.data(), &zcull_params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetErrorNotifier params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset=%lx, size=%lx, mem=%x", params.offset,
+                params.size, params.mem);
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output) {
+    std::memcpy(&channel_priority, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority=%x", channel_priority);
+    std::memcpy(output.data(), &channel_priority, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlAllocGpfifoEx2 params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV,
+                "(STUBBED) called, num_entries=%x, flags=%x, unk0=%x, unk1=%x, unk2=%x, unk3=%x",
+                params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
+                params.unk3);
+    params.fence_out.id = 0;
+    params.fence_out.value = 0;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlAllocObjCtx params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num=%x, flags=%x", params.class_num,
+                params.flags);
+    params.obj_id = 0x0;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
+    if (input.size() < sizeof(IoctlSubmitGpfifo))
+        UNIMPLEMENTED();
+    IoctlSubmitGpfifo params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo=%lx, num_entries=%x, flags=%x",
+                params.gpfifo, params.num_entries, params.flags);
+
+    auto entries = std::vector<IoctlGpfifoEntry>();
+    entries.resize(params.num_entries);
+    std::memcpy(&entries[0], &input.data()[sizeof(IoctlSubmitGpfifo)],
+                params.num_entries * sizeof(IoctlGpfifoEntry));
+    for (auto entry : entries) {
+        VAddr va_addr = entry.Address();
+        // TODO(ogniK): Process these
+    }
+    params.fence_out.id = 0;
+    params.fence_out.value = 0;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@ -0,0 +1,139 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+constexpr u32 NVGPU_IOCTL_MAGIC('H');
+constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8);
+
+class nvhost_gpu final : public nvdevice {
+public:
+    nvhost_gpu() = default;
+    ~nvhost_gpu() override = default;
+
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocSetNVMAPfdCommand = 0x40044801,
+        IocSetClientDataCommand = 0x40084714,
+        IocGetClientDataCommand = 0x80084715,
+        IocZCullBind = 0xc010480b,
+        IocSetErrorNotifierCommand = 0xC018480C,
+        IocChannelSetPriorityCommand = 0x4004480D,
+        IocAllocGPFIFOEx2Command = 0xC020481A,
+        IocAllocObjCtxCommand = 0xC0104809,
+    };
+
+    enum class CtxObjects : u32_le {
+        Ctx2D = 0x902D,
+        Ctx3D = 0xB197,
+        CtxCompute = 0xB1C0,
+        CtxKepler = 0xA140,
+        CtxDMA = 0xB0B5,
+        CtxChannelGPFIFO = 0xB06F,
+    };
+
+    struct IoctlSetNvmapFD {
+        u32_le nvmap_fd;
+    };
+    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+
+    struct IoctlClientData {
+        u64_le data;
+    };
+    static_assert(sizeof(IoctlClientData) == 8, "IoctlClientData is incorrect size");
+
+    struct IoctlZCullBind {
+        u64_le gpu_va;
+        u32_le mode; // 0=global, 1=no_ctxsw, 2=separate_buffer, 3=part_of_regular_buf
+        INSERT_PADDING_WORDS(1);
+    };
+    static_assert(sizeof(IoctlZCullBind) == 16, "IoctlZCullBind is incorrect size");
+
+    struct IoctlSetErrorNotifier {
+        u64_le offset;
+        u64_le size;
+        u32_le mem; // nvmap object handle
+        INSERT_PADDING_WORDS(1);
+    };
+    static_assert(sizeof(IoctlSetErrorNotifier) == 24, "IoctlSetErrorNotifier is incorrect size");
+
+    struct IoctlFence {
+        u32_le id;
+        u32_le value;
+    };
+    static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size");
+
+    struct IoctlAllocGpfifoEx2 {
+        u32_le num_entries;   // in
+        u32_le flags;         // in
+        u32_le unk0;          // in (1 works)
+        IoctlFence fence_out; // out
+        u32_le unk1;          // in
+        u32_le unk2;          // in
+        u32_le unk3;          // in
+    };
+    static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size");
+
+    struct IoctlAllocObjCtx {
+        u32_le class_num; // 0x902D=2d, 0xB197=3d, 0xB1C0=compute, 0xA140=kepler, 0xB0B5=DMA,
+                          // 0xB06F=channel_gpfifo
+        u32_le flags;
+        u64_le obj_id; // (ignored) used for FREE_OBJ_CTX ioctl, which is not supported
+    };
+    static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
+
+    struct IoctlGpfifoEntry {
+        u32_le entry0; // gpu_va_lo
+        union {
+            u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+            BitField<0, 8, u32_le> gpu_va_hi;
+            BitField<8, 2, u32_le> unk1;
+            BitField<10, 21, u32_le> sz;
+            BitField<31, 1, u32_le> unk2;
+        };
+
+        VAddr Address() const {
+            return (static_cast<VAddr>(gpu_va_hi) << 32) | entry0;
+        }
+    };
+    static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
+
+    struct IoctlSubmitGpfifo {
+        u64_le gpfifo;      // (ignored) pointer to gpfifo fence structs
+        u32_le num_entries; // number of fence objects being submitted
+        u32_le flags;
+        IoctlFence fence_out; // returned new fence object for others to wait on
+    };
+    static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence),
+                  "submit_gpfifo is incorrect size");
+
+    u32_le nvmap_fd{};
+    u64_le user_data{};
+    IoctlZCullBind zcull_params{};
+    u32_le channel_priority{};
+
+    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetClientData(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZCullBind(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
+};
+
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@ -21,8 +21,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const {
    return object->addr;
 }

-u32 nvmap::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
-    switch (static_cast<IoctlCommand>(command)) {
+u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    switch (static_cast<IoctlCommand>(command.raw)) {
    case IoctlCommand::Create:
        return IocCreate(input, output);
    case IoctlCommand::Alloc:
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@ -24,7 +24,7 @@ public:
    /// Returns the allocated address of an nvmap object given its handle.
    VAddr GetObjectAddress(u32 handle) const;

-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

 private:
    // Represents an nvmap object.
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@ -4,6 +4,7 @@

 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/event.h"
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"

@ -11,7 +12,7 @@ namespace Service {
 namespace Nvidia {

 void NVDRV::Open(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    LOG_DEBUG(Service_NVDRV, "called");

    auto buffer = ctx.BufferDescriptorA()[0];

@ -25,31 +26,35 @@ void NVDRV::Open(Kernel::HLERequestContext& ctx) {
 }

 void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    LOG_DEBUG(Service_NVDRV, "called");

    IPC::RequestParser rp{ctx};
    u32 fd = rp.Pop<u32>();
    u32 command = rp.Pop<u32>();

-    auto input_buffer = ctx.BufferDescriptorA()[0];
-    auto output_buffer = ctx.BufferDescriptorB()[0];
-
-    std::vector<u8> input(input_buffer.Size());
-    std::vector<u8> output(output_buffer.Size());
-
-    Memory::ReadBlock(input_buffer.Address(), input.data(), input_buffer.Size());
-
-    u32 nv_result = nvdrv->Ioctl(fd, command, input, output);
-
-    Memory::WriteBlock(output_buffer.Address(), output.data(), output_buffer.Size());
-
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push(nv_result);
+    if (ctx.BufferDescriptorA()[0].Size() != 0) {
+        auto input_buffer = ctx.BufferDescriptorA()[0];
+        auto output_buffer = ctx.BufferDescriptorB()[0];
+        std::vector<u8> input(input_buffer.Size());
+        std::vector<u8> output(output_buffer.Size());
+        Memory::ReadBlock(input_buffer.Address(), input.data(), input_buffer.Size());
+        rb.Push(nvdrv->Ioctl(fd, command, input, output));
+        Memory::WriteBlock(output_buffer.Address(), output.data(), output_buffer.Size());
+    } else {
+        auto input_buffer = ctx.BufferDescriptorX()[0];
+        auto output_buffer = ctx.BufferDescriptorC()[0];
+        std::vector<u8> input(input_buffer.size);
+        std::vector<u8> output(output_buffer.size);
+        Memory::ReadBlock(input_buffer.Address(), input.data(), input_buffer.size);
+        rb.Push(nvdrv->Ioctl(fd, command, input, output));
+        Memory::WriteBlock(output_buffer.Address(), output.data(), output_buffer.size);
+    }
 }

 void NVDRV::Close(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    LOG_DEBUG(Service_NVDRV, "called");

    IPC::RequestParser rp{ctx};
    u32 fd = rp.Pop<u32>();
@ -67,16 +72,35 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
    rb.Push<u32>(0);
 }

+void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    u32 fd = rp.Pop<u32>();
+    u32 event_id = rp.Pop<u32>();
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd=%x, event_id=%x", fd, event_id);
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(RESULT_SUCCESS);
+    auto event = Kernel::Event::Create(Kernel::ResetType::Pulse, "NVEvent");
+    event->Signal();
+    rb.PushCopyObjects(event);
+}
+
 void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
    pid = rp.Pop<u64>();

-    LOG_INFO(Service_NVDRV, "called, pid=0x%lx", pid);
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, pid=0x%lx", pid);
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
    rb.Push<u32>(0);
 }

+void NVDRV::FinishInitialize(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
 NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
    : ServiceFramework(name), nvdrv(std::move(nvdrv)) {
    static const FunctionInfo functions[] = {
@ -84,7 +108,9 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
        {1, &NVDRV::Ioctl, "Ioctl"},
        {2, &NVDRV::Close, "Close"},
        {3, &NVDRV::Initialize, "Initialize"},
+        {4, &NVDRV::QueryEvent, "QueryEvent"},
        {8, &NVDRV::SetClientPID, "SetClientPID"},
+        {13, &NVDRV::FinishInitialize, "FinishInitialize"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@ -22,7 +22,9 @@ private:
    void Ioctl(Kernel::HLERequestContext& ctx);
    void Close(Kernel::HLERequestContext& ctx);
    void Initialize(Kernel::HLERequestContext& ctx);
+    void QueryEvent(Kernel::HLERequestContext& ctx);
    void SetClientPID(Kernel::HLERequestContext& ctx);
+    void FinishInitialize(Kernel::HLERequestContext& ctx);

    std::shared_ptr<Module> nvdrv;

--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@ -7,6 +7,8 @@
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
+#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
@ -21,6 +23,8 @@ void InstallInterfaces(SM::ServiceManager& service_manager) {
    auto module_ = std::make_shared<Module>();
    std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager);
    std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager);
+    std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager);
+    std::make_shared<NVDRV>(module_, "nvdrv:t")->InstallAsService(service_manager);
    std::make_shared<NVMEMP>()->InstallAsService(service_manager);
    nvdrv = module_;
 }
@ -28,9 +32,11 @@ void InstallInterfaces(SM::ServiceManager& service_manager) {
 Module::Module() {
    auto nvmap_dev = std::make_shared<Devices::nvmap>();
    devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>();
+    devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>();
    devices["/dev/nvmap"] = nvmap_dev;
    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev);
    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>();
+    devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>();
 }

 u32 Module::Open(std::string device_name) {
@ -45,12 +51,12 @@ u32 Module::Open(std::string device_name) {
    return fd;
 }

-u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
+u32 Module::Ioctl(u32 fd, u32_le command, const std::vector<u8>& input, std::vector<u8>& output) {
    auto itr = open_files.find(fd);
    ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device");

    auto device = itr->second;
-    return device->ioctl(command, input, output);
+    return device->ioctl({command}, input, output);
 }

 ResultCode Module::Close(u32 fd) {