SMMU: Add Android compatibility
This commit is contained in:
parent
0adc09e0af
commit
303cd31162
9 changed files with 42 additions and 50 deletions
|
@ -217,9 +217,6 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
|
||||||
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
|
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
|
||||||
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
|
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
|
||||||
cached_pages = std::make_unique<CachedPages>();
|
cached_pages = std::make_unique<CachedPages>();
|
||||||
for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
|
|
||||||
compressed_device_addr[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
|
@ -517,7 +514,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
|
||||||
u64 cache_begin = 0;
|
u64 cache_begin = 0;
|
||||||
u64 uncache_bytes = 0;
|
u64 uncache_bytes = 0;
|
||||||
u64 cache_bytes = 0;
|
u64 cache_bytes = 0;
|
||||||
const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
|
const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
|
||||||
|
|
||||||
std::atomic_thread_fence(std::memory_order_acquire);
|
std::atomic_thread_fence(std::memory_order_acquire);
|
||||||
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
|
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
|
#include "core/hle/kernel/k_process.h"
|
||||||
#include "core/hle/service/nvdrv/core/container.h"
|
#include "core/hle/service/nvdrv/core/container.h"
|
||||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||||
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
||||||
|
@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
|
||||||
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
||||||
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
|
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
|
||||||
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
|
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
|
||||||
cmdlist.size() * sizeof(u32));
|
cmdlist.size() * sizeof(u32));
|
||||||
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
|
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
|
||||||
}
|
}
|
||||||
// Some games expect command_buffers to be written back
|
// Some games expect command_buffers to be written back
|
||||||
|
@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) {
|
NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
|
||||||
|
DeviceFD fd) {
|
||||||
const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
|
const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
|
||||||
for (size_t i = 0; i < num_entries; i++) {
|
for (size_t i = 0; i < num_entries; i++) {
|
||||||
DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);
|
DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);
|
||||||
|
|
|
@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
|
||||||
// from outside classes. This also allows modification to the internals of the memory
|
// from outside classes. This also allows modification to the internals of the memory
|
||||||
// subsystem without needing to rebuild all files that make use of the memory interface.
|
// subsystem without needing to rebuild all files that make use of the memory interface.
|
||||||
struct Memory::Impl {
|
struct Memory::Impl {
|
||||||
explicit Impl(Core::System& system_)
|
explicit Impl(Core::System& system_) : system{system_} {}
|
||||||
: system{system_} {}
|
|
||||||
|
|
||||||
void SetCurrentPageTable(Kernel::KProcess& process) {
|
void SetCurrentPageTable(Kernel::KProcess& process) {
|
||||||
current_page_table = &process.GetPageTable().GetImpl();
|
current_page_table = &process.GetPageTable().GetImpl();
|
||||||
|
@ -640,18 +639,6 @@ struct Memory::Impl {
|
||||||
LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
|
LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
|
||||||
base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
|
base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
|
||||||
|
|
||||||
// During boot, current_page_table might not be set yet, in which case we need not flush
|
|
||||||
/*if (system.IsPoweredOn()) {
|
|
||||||
auto& gpu = system.GPU();
|
|
||||||
for (u64 i = 0; i < size; i++) {
|
|
||||||
const auto page = base + i;
|
|
||||||
if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
|
|
||||||
|
|
||||||
gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
const auto end = base + size;
|
const auto end = base + size;
|
||||||
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
||||||
base + page_table.pointers.size());
|
base + page_table.pointers.size());
|
||||||
|
@ -823,8 +810,7 @@ struct Memory::Impl {
|
||||||
}
|
}
|
||||||
const size_t core = system.GetCurrentHostThreadID();
|
const size_t core = system.GetCurrentHostThreadID();
|
||||||
auto& current_area = rasterizer_read_areas[core];
|
auto& current_area = rasterizer_read_areas[core];
|
||||||
gpu_device_memory->ApplyOpOnPointer(
|
gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
|
||||||
p, scratch_buffers[core], [&](DAddr address) {
|
|
||||||
const DAddr end_address = address + size;
|
const DAddr end_address = address + size;
|
||||||
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
||||||
[[likely]] {
|
[[likely]] {
|
||||||
|
@ -852,8 +838,7 @@ struct Memory::Impl {
|
||||||
sys_core_guard.unlock();
|
sys_core_guard.unlock();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
gpu_device_memory->ApplyOpOnPointer(
|
gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
|
||||||
p, scratch_buffers[core], [&](DAddr address) {
|
|
||||||
auto& current_area = rasterizer_write_areas[core];
|
auto& current_area = rasterizer_write_areas[core];
|
||||||
PAddr subaddress = address >> YUZU_PAGEBITS;
|
PAddr subaddress = address >> YUZU_PAGEBITS;
|
||||||
bool do_collection = current_area.last_address == subaddress;
|
bool do_collection = current_area.last_address == subaddress;
|
||||||
|
@ -872,12 +857,25 @@ struct Memory::Impl {
|
||||||
PAddr last_address;
|
PAddr last_address;
|
||||||
};
|
};
|
||||||
|
|
||||||
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
|
void InvalidateGPUMemory(u8* p, size_t size) {
|
||||||
system.GPU().InvalidateRegion(GetInteger(dest_addr), size);
|
constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
|
||||||
}
|
const size_t core = std::min(system.GetCurrentHostThreadID(),
|
||||||
|
sys_core); // any other calls threads go to syscore.
|
||||||
void FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
|
if (!gpu_device_memory) [[unlikely]] {
|
||||||
system.GPU().FlushRegion(GetInteger(dest_addr), size);
|
gpu_device_memory = &system.Host1x().MemoryManager();
|
||||||
|
}
|
||||||
|
// Guard on sys_core;
|
||||||
|
if (core == sys_core) [[unlikely]] {
|
||||||
|
sys_core_guard.lock();
|
||||||
|
}
|
||||||
|
SCOPE_EXIT({
|
||||||
|
if (core == sys_core) [[unlikely]] {
|
||||||
|
sys_core_guard.unlock();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
auto& gpu = system.GPU();
|
||||||
|
gpu_device_memory->ApplyOpOnPointer(
|
||||||
|
p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
|
||||||
}
|
}
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
|
||||||
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
|
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
|
|
||||||
impl->InvalidateRegion(dest_addr, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
|
|
||||||
impl->FlushRegion(dest_addr, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
|
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
|
||||||
[[maybe_unused]] bool mapped = true;
|
[[maybe_unused]] bool mapped = true;
|
||||||
[[maybe_unused]] bool rasterizer = false;
|
[[maybe_unused]] bool rasterizer = false;
|
||||||
|
@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
|
||||||
GetInteger(vaddr));
|
GetInteger(vaddr));
|
||||||
mapped = false;
|
mapped = false;
|
||||||
},
|
},
|
||||||
[&] {
|
[&] { rasterizer = true; });
|
||||||
impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size);
|
if (rasterizer) {
|
||||||
rasterizer = true;
|
impl->InvalidateGPUMemory(ptr, size);
|
||||||
});
|
}
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if (!rasterizer && mapped) {
|
if (!rasterizer && mapped) {
|
||||||
|
|
|
@ -486,10 +486,10 @@ public:
|
||||||
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
||||||
|
|
||||||
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
||||||
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
|
|
||||||
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
|
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
|
||||||
|
|
||||||
bool InvalidateSeparateHeap(void* fault_address);
|
bool InvalidateSeparateHeap(void* fault_address);
|
||||||
void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
|
@ -17,7 +17,7 @@ struct MaxwellDeviceTraits {
|
||||||
static constexpr bool supports_pinning = false;
|
static constexpr bool supports_pinning = false;
|
||||||
static constexpr size_t device_virtual_bits = 34;
|
static constexpr size_t device_virtual_bits = 34;
|
||||||
using DeviceInterface = typename VideoCore::RasterizerInterface;
|
using DeviceInterface = typename VideoCore::RasterizerInterface;
|
||||||
using DeviceMethods = typename MaxwellDeviceMethods;
|
using DeviceMethods = MaxwellDeviceMethods;
|
||||||
};
|
};
|
||||||
|
|
||||||
using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
|
using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
|
||||||
|
|
|
@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_)
|
||||||
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
|
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
|
||||||
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
|
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
|
||||||
|
|
||||||
|
Host1x::~Host1x() = default;
|
||||||
|
|
||||||
} // namespace Host1x
|
} // namespace Host1x
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
|
@ -21,6 +21,7 @@ namespace Host1x {
|
||||||
class Host1x {
|
class Host1x {
|
||||||
public:
|
public:
|
||||||
explicit Host1x(Core::System& system);
|
explicit Host1x(Core::System& system);
|
||||||
|
~Host1x();
|
||||||
|
|
||||||
SyncpointManager& GetSyncpointManager() {
|
SyncpointManager& GetSyncpointManager() {
|
||||||
return syncpoint_manager;
|
return syncpoint_manager;
|
||||||
|
|
|
@ -68,7 +68,7 @@ public:
|
||||||
if (!address) {
|
if (!address) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return memory.GetPointer(*address);
|
return memory.GetPointer<T>(*address);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
|
||||||
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
|
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
|
||||||
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
|
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
|
||||||
};
|
};
|
||||||
u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr);
|
u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
|
||||||
u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8);
|
u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
|
||||||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
|
||||||
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
|
||||||
pointer, pointer_timestamp] {
|
pointer, pointer_timestamp] {
|
||||||
|
@ -561,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
|
||||||
}
|
}
|
||||||
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
|
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
|
||||||
False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
|
False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
|
||||||
auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address);
|
auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
|
||||||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
|
||||||
std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
|
std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in a new issue