diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index b5f9b176..2e8273ec 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -305,6 +305,7 @@ if (ARCHITECTURE STREQUAL "x86_64") target_sources(dynarmic PRIVATE backend/x64/a32_emit_x64.cpp backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp backend/x64/a32_interface.cpp backend/x64/a32_jitstate.cpp backend/x64/a32_jitstate.h @@ -315,6 +316,7 @@ if (ARCHITECTURE STREQUAL "x86_64") target_sources(dynarmic PRIVATE backend/x64/a64_emit_x64.cpp backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp backend/x64/a64_interface.cpp backend/x64/a64_jitstate.cpp backend/x64/a64_jitstate.h diff --git a/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/backend/x64/a32_emit_x64.cpp index 4f819825..d85da513 100644 --- a/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -11,7 +11,6 @@ #include #include -#include #include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/backend/x64/abi.h" @@ -26,11 +25,9 @@ #include "dynarmic/common/common_types.h" #include "dynarmic/common/scope_exit.h" #include "dynarmic/common/variant_util.h" -#include "dynarmic/common/x64_disassemble.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" -#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -198,67 +195,6 @@ void A32EmitX64::ClearFastDispatchTable() { } } -void A32EmitX64::GenFastmemFallbacks() { - const std::initializer_list idxes{0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; - const std::array, 4> read_callbacks{{ - {8, Devirtualize<&A32::UserCallbacks::MemoryRead8>(conf.callbacks)}, - {16, Devirtualize<&A32::UserCallbacks::MemoryRead16>(conf.callbacks)}, - {32, Devirtualize<&A32::UserCallbacks::MemoryRead32>(conf.callbacks)}, - {64, Devirtualize<&A32::UserCallbacks::MemoryRead64>(conf.callbacks)}, - }}; - const std::array, 4> write_callbacks{{ - {8, Devirtualize<&A32::UserCallbacks::MemoryWrite8>(conf.callbacks)}, - {16, Devirtualize<&A32::UserCallbacks::MemoryWrite16>(conf.callbacks)}, - {32, Devirtualize<&A32::UserCallbacks::MemoryWrite32>(conf.callbacks)}, - {64, Devirtualize<&A32::UserCallbacks::MemoryWrite64>(conf.callbacks)}, - }}; - - for (int vaddr_idx : idxes) { - for (int value_idx : idxes) { - for (const auto& [bitsize, callback] : read_callbacks) { - code.align(); - read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } - callback.EmitCall(code); - if (value_idx != code.ABI_RETURN.getIdx()) { - code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN); - } - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); - code.ret(); - PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a32_read_fallback_{}", bitsize)); - } - - for (const auto& [bitsize, callback] : write_callbacks) { - code.align(); - write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); - ABI_PushCallerSaveRegistersAndAdjustStack(code); - if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { - code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); - } else if (vaddr_idx == code.ABI_PARAM3.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - if (value_idx != code.ABI_PARAM3.getIdx()) { - code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); - } - } else { - if (value_idx != code.ABI_PARAM3.getIdx()) { - code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); - } - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } - } - callback.EmitCall(code); - ABI_PopCallerSaveRegistersAndAdjustStack(code); - code.ret(); - PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a32_write_fallback_{}", bitsize)); - } - } - } -} - void A32EmitX64::GenTerminalHandlers() { // PC ends up in ebp, location_descriptor ends up in rbx const auto calculate_location_descriptor = [this] { @@ -875,372 +811,6 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value); } -void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); -} - -std::optional A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const { - if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { - return std::nullopt; - } - - const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); - if (do_not_fastmem.count(marker) > 0) { - return std::nullopt; - } - return marker; -} - -FakeCall A32EmitX64::FastmemCallback(u64 rip_) { - const auto iter = fastmem_patch_info.find(rip_); - - if (iter == fastmem_patch_info.end()) { - fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); - fmt::print("Segfault wasn't at a fastmem patch location!\n"); - fmt::print("Now dumping code.......\n\n"); - Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); - ASSERT_FALSE("iter != fastmem_patch_info.end()"); - } - - if (conf.recompile_on_fastmem_failure) { - const auto marker = iter->second.marker; - do_not_fastmem.emplace(marker); - InvalidateBasicBlocks({std::get<0>(marker)}); - } - FakeCall ret; - ret.call_rip = iter->second.callback; - ret.ret_rip = iter->second.resume_rip; - return ret; -} - -namespace { - -constexpr size_t page_bits = 12; -constexpr size_t page_size = 1 << page_bits; -constexpr size_t page_mask = (1 << page_bits) - 1; - -void EmitDetectMisaignedVAddr(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg32 vaddr, Xbyak::Reg32 tmp) { - if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { - return; - } - - const u32 align_mask = [bitsize]() -> u32 { - switch (bitsize) { - case 16: - return 0b1; - case 32: - return 0b11; - case 64: - return 0b111; - } - UNREACHABLE(); - }(); - - code.test(vaddr, align_mask); - - if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { - code.jnz(abort, code.T_NEAR); - return; - } - - const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; - - Xbyak::Label detect_boundary, resume; - - code.jnz(detect_boundary, code.T_NEAR); - code.L(resume); - - code.SwitchToFarCode(); - code.L(detect_boundary); - code.mov(tmp, vaddr); - code.and_(tmp, page_align_mask); - code.cmp(tmp, page_align_mask); - code.jne(resume, code.T_NEAR); - // NOTE: We expect to fallthrough into abort code here. - code.SwitchToNearCode(); -} - -Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { - const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32(); - - EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr.cvt32(), tmp); - - // TODO: This code assumes vaddr has been zext from 32-bits to 64-bits. - - code.mov(tmp, vaddr.cvt32()); - code.shr(tmp, static_cast(page_bits)); - code.mov(page, qword[r14 + tmp.cvt64() * sizeof(void*)]); - if (ctx.conf.page_table_pointer_mask_bits == 0) { - code.test(page, page); - } else { - code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits); - } - code.jz(abort, code.T_NEAR); - if (ctx.conf.absolute_offset_page_table) { - return page + vaddr; - } - code.mov(tmp, vaddr.cvt32()); - code.and_(tmp, static_cast(page_mask)); - return page + tmp.cvt64(); -} - -template -void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) { - switch (bitsize) { - case 8: - code.movzx(value.cvt32(), code.byte[addr]); - return; - case 16: - code.movzx(value.cvt32(), word[addr]); - return; - case 32: - code.mov(value.cvt32(), dword[addr]); - return; - case 64: - code.mov(value, qword[addr]); - return; - default: - ASSERT_FALSE("Invalid bitsize"); - } -} - -template -void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) { - switch (bitsize) { - case 8: - code.mov(code.byte[addr], value.cvt8()); - return; - case 16: - code.mov(word[addr], value.cvt16()); - return; - case 32: - code.mov(dword[addr], value.cvt32()); - return; - case 64: - code.mov(qword[addr], value); - return; - default: - ASSERT_FALSE("Invalid bitsize"); - } -} - -} // anonymous namespace - -template -void A32EmitX64::EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(inst, {}, args[0]); - Devirtualize(conf.callbacks).EmitCall(code); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - if (fastmem_marker) { - // Use fastmem - const auto src_ptr = r13 + vaddr; - - const auto location = code.getCurr(); - EmitReadMemoryMov(code, value, src_ptr); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - }); - - ctx.reg_alloc.DefineValue(inst, value); - return; - } - - // Use page table - ASSERT(conf.page_table); - Xbyak::Label abort, end; - - const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - EmitReadMemoryMov(code, value, src_ptr); - code.L(end); - - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - - ctx.reg_alloc.DefineValue(inst, value); -} - -template -void A32EmitX64::EmitMemoryWrite(A32EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); - Devirtualize(conf.callbacks).EmitCall(code); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - - const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - if (fastmem_marker) { - // Use fastmem - const auto dest_ptr = r13 + vaddr; - - const auto location = code.getCurr(); - EmitWriteMemoryMov(code, dest_ptr, value); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - }); - - return; - } - - // Use page table - ASSERT(conf.page_table); - Xbyak::Label abort, end; - - const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - EmitWriteMemoryMov(code, dest_ptr, value); - code.L(end); - - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); -} - -void A32EmitX64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); -} - -void A32EmitX64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); -} - -void A32EmitX64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); -} - -void A32EmitX64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); -} - -void A32EmitX64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<8, &A32::UserCallbacks::MemoryWrite8>(ctx, inst); -} - -void A32EmitX64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<16, &A32::UserCallbacks::MemoryWrite16>(ctx, inst); -} - -void A32EmitX64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<32, &A32::UserCallbacks::MemoryWrite32>(ctx, inst); -} - -void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst); -} - -template -void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) { - using T = mp::unsigned_integer_of_size; - - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - ctx.reg_alloc.HostCall(inst, {}, args[0]); - - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A32::UserConfig& conf, u32 vaddr) -> T { - return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { - return (conf.callbacks->*callback)(vaddr); - }); - }); -} - -template -void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) { - using T = mp::unsigned_integer_of_size; - - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); - - Xbyak::Label end; - - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code.je(end); - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A32::UserConfig& conf, u32 vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); - code.L(end); -} - -void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); -} - -void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { - ExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); -} - static void EmitCoprocessorException() { ASSERT_FALSE("Should raise coproc exception here"); } diff --git a/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp new file mode 100644 index 00000000..e9c14725 --- /dev/null +++ b/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -0,0 +1,454 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "dynarmic/backend/x64/a32_emit_x64.h" +#include "dynarmic/backend/x64/abi.h" +#include "dynarmic/backend/x64/devirtualize.h" +#include "dynarmic/backend/x64/perf_map.h" +#include "dynarmic/common/x64_disassemble.h" +#include "dynarmic/interface/exclusive_monitor.h" + +namespace Dynarmic::Backend::X64 { + +using namespace Xbyak::util; + +void A32EmitX64::GenFastmemFallbacks() { + const std::initializer_list idxes{0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; + const std::array, 4> read_callbacks{{ + {8, Devirtualize<&A32::UserCallbacks::MemoryRead8>(conf.callbacks)}, + {16, Devirtualize<&A32::UserCallbacks::MemoryRead16>(conf.callbacks)}, + {32, Devirtualize<&A32::UserCallbacks::MemoryRead32>(conf.callbacks)}, + {64, Devirtualize<&A32::UserCallbacks::MemoryRead64>(conf.callbacks)}, + }}; + const std::array, 4> write_callbacks{{ + {8, Devirtualize<&A32::UserCallbacks::MemoryWrite8>(conf.callbacks)}, + {16, Devirtualize<&A32::UserCallbacks::MemoryWrite16>(conf.callbacks)}, + {32, Devirtualize<&A32::UserCallbacks::MemoryWrite32>(conf.callbacks)}, + {64, Devirtualize<&A32::UserCallbacks::MemoryWrite64>(conf.callbacks)}, + }}; + + for (int vaddr_idx : idxes) { + for (int value_idx : idxes) { + for (const auto& [bitsize, callback] : read_callbacks) { + code.align(); + read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + callback.EmitCall(code); + if (value_idx != code.ABI_RETURN.getIdx()) { + code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN); + } + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); + code.ret(); + PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a32_read_fallback_{}", bitsize)); + } + + for (const auto& [bitsize, callback] : write_callbacks) { + code.align(); + write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(code); + if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { + code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); + } else if (vaddr_idx == code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + } else { + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + } + callback.EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStack(code); + code.ret(); + PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a32_write_fallback_{}", bitsize)); + } + } + } +} + +std::optional A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const { + if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { + return std::nullopt; + } + + const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); + if (do_not_fastmem.count(marker) > 0) { + return std::nullopt; + } + return marker; +} + +FakeCall A32EmitX64::FastmemCallback(u64 rip_) { + const auto iter = fastmem_patch_info.find(rip_); + + if (iter == fastmem_patch_info.end()) { + fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); + fmt::print("Segfault wasn't at a fastmem patch location!\n"); + fmt::print("Now dumping code.......\n\n"); + Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); + ASSERT_FALSE("iter != fastmem_patch_info.end()"); + } + + if (conf.recompile_on_fastmem_failure) { + const auto marker = iter->second.marker; + do_not_fastmem.emplace(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); + } + FakeCall ret; + ret.call_rip = iter->second.callback; + ret.ret_rip = iter->second.resume_rip; + return ret; +} + +namespace { + +constexpr size_t page_bits = 12; +constexpr size_t page_size = 1 << page_bits; +constexpr size_t page_mask = (1 << page_bits) - 1; + +void EmitDetectMisaignedVAddr(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg32 vaddr, Xbyak::Reg32 tmp) { + if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { + return; + } + + const u32 align_mask = [bitsize]() -> u32 { + switch (bitsize) { + case 16: + return 0b1; + case 32: + return 0b11; + case 64: + return 0b111; + } + UNREACHABLE(); + }(); + + code.test(vaddr, align_mask); + + if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + code.jnz(abort, code.T_NEAR); + return; + } + + const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; + + Xbyak::Label detect_boundary, resume; + + code.jnz(detect_boundary, code.T_NEAR); + code.L(resume); + + code.SwitchToFarCode(); + code.L(detect_boundary); + code.mov(tmp, vaddr); + code.and_(tmp, page_align_mask); + code.cmp(tmp, page_align_mask); + code.jne(resume, code.T_NEAR); + // NOTE: We expect to fallthrough into abort code here. + code.SwitchToNearCode(); +} + +Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { + const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32(); + + EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr.cvt32(), tmp); + + // TODO: This code assumes vaddr has been zext from 32-bits to 64-bits. + + code.mov(tmp, vaddr.cvt32()); + code.shr(tmp, static_cast(page_bits)); + code.mov(page, qword[r14 + tmp.cvt64() * sizeof(void*)]); + if (ctx.conf.page_table_pointer_mask_bits == 0) { + code.test(page, page); + } else { + code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits); + } + code.jz(abort, code.T_NEAR); + if (ctx.conf.absolute_offset_page_table) { + return page + vaddr; + } + code.mov(tmp, vaddr.cvt32()); + code.and_(tmp, static_cast(page_mask)); + return page + tmp.cvt64(); +} + +template +void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) { + switch (bitsize) { + case 8: + code.movzx(value.cvt32(), code.byte[addr]); + return; + case 16: + code.movzx(value.cvt32(), word[addr]); + return; + case 32: + code.mov(value.cvt32(), dword[addr]); + return; + case 64: + code.mov(value, qword[addr]); + return; + default: + ASSERT_FALSE("Invalid bitsize"); + } +} + +template +void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) { + switch (bitsize) { + case 8: + code.mov(code.byte[addr], value.cvt8()); + return; + case 16: + code.mov(word[addr], value.cvt16()); + return; + case 32: + code.mov(dword[addr], value.cvt32()); + return; + case 64: + code.mov(qword[addr], value); + return; + default: + ASSERT_FALSE("Invalid bitsize"); + } +} + +} // anonymous namespace + +template +void A32EmitX64::EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto fastmem_marker = ShouldFastmem(ctx, inst); + + if (!conf.page_table && !fastmem_marker) { + // Neither fastmem nor page table: Use callbacks + ctx.reg_alloc.HostCall(inst, {}, args[0]); + Devirtualize(conf.callbacks).EmitCall(code); + return; + } + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); + + const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + + if (fastmem_marker) { + // Use fastmem + const auto src_ptr = r13 + vaddr; + + const auto location = code.getCurr(); + EmitReadMemoryMov(code, value, src_ptr); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + }); + + ctx.reg_alloc.DefineValue(inst, value); + return; + } + + // Use page table + ASSERT(conf.page_table); + Xbyak::Label abort, end; + + const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + EmitReadMemoryMov(code, value, src_ptr); + code.L(end); + + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + + ctx.reg_alloc.DefineValue(inst, value); +} + +template +void A32EmitX64::EmitMemoryWrite(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto fastmem_marker = ShouldFastmem(ctx, inst); + + if (!conf.page_table && !fastmem_marker) { + // Neither fastmem nor page table: Use callbacks + ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + Devirtualize(conf.callbacks).EmitCall(code); + return; + } + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + + const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + + if (fastmem_marker) { + // Use fastmem + const auto dest_ptr = r13 + vaddr; + + const auto location = code.getCurr(); + EmitWriteMemoryMov(code, dest_ptr, value); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + }); + + return; + } + + // Use page table + ASSERT(conf.page_table); + Xbyak::Label abort, end; + + const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + EmitWriteMemoryMov(code, dest_ptr, value); + code.L(end); + + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); +} + +void A32EmitX64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); +} + +void A32EmitX64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); +} + +void A32EmitX64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); +} + +void A32EmitX64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); +} + +void A32EmitX64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<8, &A32::UserCallbacks::MemoryWrite8>(ctx, inst); +} + +void A32EmitX64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<16, &A32::UserCallbacks::MemoryWrite16>(ctx, inst); +} + +void A32EmitX64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<32, &A32::UserCallbacks::MemoryWrite32>(ctx, inst); +} + +void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst); +} + +template +void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) { + using T = mp::unsigned_integer_of_size; + + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + ctx.reg_alloc.HostCall(inst, {}, args[0]); + + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A32::UserConfig& conf, u32 vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); + }); + }); +} + +template +void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) { + using T = mp::unsigned_integer_of_size; + + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); + + Xbyak::Label end; + + code.mov(code.ABI_RETURN, u32(1)); + code.cmp(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A32::UserConfig& conf, u32 vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); + code.L(end); +} + +void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); +} + +void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); +} + +void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); +} + +} // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/backend/x64/a64_emit_x64.cpp index 8de3ee5f..a4d36aa9 100644 --- a/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -5,8 +5,6 @@ #include "dynarmic/backend/x64/a64_emit_x64.h" -#include - #include #include #include @@ -23,10 +21,8 @@ #include "dynarmic/common/bit_util.h" #include "dynarmic/common/common_types.h" #include "dynarmic/common/scope_exit.h" -#include "dynarmic/common/x64_disassemble.h" #include "dynarmic/frontend/A64/a64_location_descriptor.h" #include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/cond.h" #include "dynarmic/ir/microinstruction.h" @@ -156,155 +152,6 @@ void A64EmitX64::ClearFastDispatchTable() { } } -void A64EmitX64::GenMemory128Accessors() { - code.align(); - memory_read_128 = code.getCurr(); -#ifdef _WIN32 - Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) { - code.mov(code.ABI_PARAM3, code.ABI_PARAM2); - code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); - code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]); - }); - code.movups(xmm1, xword[code.ABI_RETURN]); - code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); -#else - code.sub(rsp, 8); - Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCall(code); - if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(xmm1, code.ABI_RETURN); - code.pinsrq(xmm1, code.ABI_RETURN2, 1); - } else { - code.movq(xmm1, code.ABI_RETURN); - code.movq(xmm2, code.ABI_RETURN2); - code.punpcklqdq(xmm1, xmm2); - } - code.add(rsp, 8); -#endif - code.ret(); - PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128"); - - code.align(); - memory_write_128 = code.getCurr(); -#ifdef _WIN32 - code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); - code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.movaps(xword[code.ABI_PARAM3], xmm1); - Devirtualize<&A64::UserCallbacks::MemoryWrite128>(conf.callbacks).EmitCall(code); - code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); -#else - code.sub(rsp, 8); - if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(code.ABI_PARAM3, xmm1); - code.pextrq(code.ABI_PARAM4, xmm1, 1); - } else { - code.movq(code.ABI_PARAM3, xmm1); - code.punpckhqdq(xmm1, xmm1); - code.movq(code.ABI_PARAM4, xmm1); - } - Devirtualize<&A64::UserCallbacks::MemoryWrite128>(conf.callbacks).EmitCall(code); - code.add(rsp, 8); -#endif - code.ret(); - PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_write_128"); -} - -void A64EmitX64::GenFastmemFallbacks() { - const std::initializer_list idxes{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - const std::array, 4> read_callbacks{{ - {8, Devirtualize<&A64::UserCallbacks::MemoryRead8>(conf.callbacks)}, - {16, Devirtualize<&A64::UserCallbacks::MemoryRead16>(conf.callbacks)}, - {32, Devirtualize<&A64::UserCallbacks::MemoryRead32>(conf.callbacks)}, - {64, Devirtualize<&A64::UserCallbacks::MemoryRead64>(conf.callbacks)}, - }}; - const std::array, 4> write_callbacks{{ - {8, Devirtualize<&A64::UserCallbacks::MemoryWrite8>(conf.callbacks)}, - {16, Devirtualize<&A64::UserCallbacks::MemoryWrite16>(conf.callbacks)}, - {32, Devirtualize<&A64::UserCallbacks::MemoryWrite32>(conf.callbacks)}, - {64, Devirtualize<&A64::UserCallbacks::MemoryWrite64>(conf.callbacks)}, - }}; - - for (int vaddr_idx : idxes) { - if (vaddr_idx == 4 || vaddr_idx == 15) { - continue; - } - - for (int value_idx : idxes) { - code.align(); - read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } - code.call(memory_read_128); - if (value_idx != 1) { - code.movaps(Xbyak::Xmm{value_idx}, xmm1); - } - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); - code.ret(); - PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128"); - - code.align(); - write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); - ABI_PushCallerSaveRegistersAndAdjustStack(code); - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } - if (value_idx != 1) { - code.movaps(xmm1, Xbyak::Xmm{value_idx}); - } - code.call(memory_write_128); - ABI_PopCallerSaveRegistersAndAdjustStack(code); - code.ret(); - PerfMapRegister(write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_write_fallback_128"); - - if (value_idx == 4 || value_idx == 15) { - continue; - } - - for (const auto& [bitsize, callback] : read_callbacks) { - code.align(); - read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } - callback.EmitCall(code); - if (value_idx != code.ABI_RETURN.getIdx()) { - code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN); - } - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); - code.ret(); - PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_read_fallback_{}", bitsize)); - } - - for (const auto& [bitsize, callback] : write_callbacks) { - code.align(); - write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); - ABI_PushCallerSaveRegistersAndAdjustStack(code); - if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { - code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); - } else if (vaddr_idx == code.ABI_PARAM3.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - if (value_idx != code.ABI_PARAM3.getIdx()) { - code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); - } - } else { - if (value_idx != code.ABI_PARAM3.getIdx()) { - code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); - } - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } - } - callback.EmitCall(code); - ABI_PopCallerSaveRegistersAndAdjustStack(code); - code.ret(); - PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_write_fallback_{}", bitsize)); - } - } - } -} - void A64EmitX64::GenTerminalHandlers() { // PC ends up in rbp, location_descriptor ends up in rbx const auto calculate_location_descriptor = [this] { @@ -742,600 +589,6 @@ void A64EmitX64::EmitA64SetTPIDR(A64EmitContext& ctx, IR::Inst* inst) { } } -void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); -} - -std::optional A64EmitX64::ShouldFastmem(A64EmitContext& ctx, IR::Inst* inst) const { - if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { - return std::nullopt; - } - - const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); - if (do_not_fastmem.count(marker) > 0) { - return std::nullopt; - } - return marker; -} - -FakeCall A64EmitX64::FastmemCallback(u64 rip_) { - const auto iter = fastmem_patch_info.find(rip_); - - if (iter == fastmem_patch_info.end()) { - fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); - fmt::print("Segfault wasn't at a fastmem patch location!\n"); - fmt::print("Now dumping code.......\n\n"); - Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); - ASSERT_FALSE("iter != fastmem_patch_info.end()"); - } - - if (conf.recompile_on_fastmem_failure) { - const auto marker = iter->second.marker; - do_not_fastmem.emplace(marker); - InvalidateBasicBlocks({std::get<0>(marker)}); - } - FakeCall ret; - ret.call_rip = iter->second.callback; - ret.ret_rip = iter->second.resume_rip; - return ret; -} - -namespace { - -constexpr size_t page_bits = 12; -constexpr size_t page_size = 1 << page_bits; -constexpr size_t page_mask = (1 << page_bits) - 1; - -void EmitDetectMisaignedVAddr(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) { - if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { - return; - } - - const u32 align_mask = [bitsize]() -> u32 { - switch (bitsize) { - case 16: - return 0b1; - case 32: - return 0b11; - case 64: - return 0b111; - case 128: - return 0b1111; - } - UNREACHABLE(); - }(); - - code.test(vaddr, align_mask); - - if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { - code.jnz(abort, code.T_NEAR); - return; - } - - const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; - - Xbyak::Label detect_boundary, resume; - - code.jnz(detect_boundary, code.T_NEAR); - code.L(resume); - - code.SwitchToFarCode(); - code.L(detect_boundary); - code.mov(tmp, vaddr); - code.and_(tmp, page_align_mask); - code.cmp(tmp, page_align_mask); - code.jne(resume, code.T_NEAR); - // NOTE: We expect to fallthrough into abort code here. - code.SwitchToNearCode(); -} - -Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { - const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits; - const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; - - const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(); - - EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr, tmp); - - if (unused_top_bits == 0) { - code.mov(tmp, vaddr); - code.shr(tmp, int(page_bits)); - } else if (ctx.conf.silently_mirror_page_table) { - if (valid_page_index_bits >= 32) { - if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(); - code.mov(bit_count, unused_top_bits); - code.bzhi(tmp, vaddr, bit_count); - code.shr(tmp, int(page_bits)); - ctx.reg_alloc.Release(bit_count); - } else { - code.mov(tmp, vaddr); - code.shl(tmp, int(unused_top_bits)); - code.shr(tmp, int(unused_top_bits + page_bits)); - } - } else { - code.mov(tmp, vaddr); - code.shr(tmp, int(page_bits)); - code.and_(tmp, u32((1 << valid_page_index_bits) - 1)); - } - } else { - ASSERT(valid_page_index_bits < 32); - code.mov(tmp, vaddr); - code.shr(tmp, int(page_bits)); - code.test(tmp, u32(-(1 << valid_page_index_bits))); - code.jnz(abort, code.T_NEAR); - } - code.mov(page, qword[r14 + tmp * sizeof(void*)]); - if (ctx.conf.page_table_pointer_mask_bits == 0) { - code.test(page, page); - } else { - code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits); - } - code.jz(abort, code.T_NEAR); - if (ctx.conf.absolute_offset_page_table) { - return page + vaddr; - } - code.mov(tmp, vaddr); - code.and_(tmp, static_cast(page_mask)); - return page + tmp; -} - -Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, bool& require_abort_handling) { - const size_t unused_top_bits = 64 - ctx.conf.fastmem_address_space_bits; - - if (unused_top_bits == 0) { - return r13 + vaddr; - } else if (ctx.conf.silently_mirror_fastmem) { - Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - if (unused_top_bits < 32) { - code.mov(tmp, vaddr); - code.shl(tmp, int(unused_top_bits)); - code.shr(tmp, int(unused_top_bits)); - } else if (unused_top_bits == 32) { - code.mov(tmp.cvt32(), vaddr.cvt32()); - } else { - code.mov(tmp.cvt32(), vaddr.cvt32()); - code.and_(tmp, u32((1 << ctx.conf.fastmem_address_space_bits) - 1)); - } - return r13 + tmp; - } else { - if (ctx.conf.fastmem_address_space_bits < 32) { - code.test(vaddr, u32(-(1 << ctx.conf.fastmem_address_space_bits))); - code.jnz(abort, code.T_NEAR); - require_abort_handling = true; - } else { - // TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator - Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - code.mov(tmp, vaddr); - code.shr(tmp, int(ctx.conf.fastmem_address_space_bits)); - code.jnz(abort, code.T_NEAR); - require_abort_handling = true; - } - return r13 + vaddr; - } -} - -template -void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) { - switch (bitsize) { - case 8: - code.movzx(value.cvt32(), code.byte[addr]); - return; - case 16: - code.movzx(value.cvt32(), word[addr]); - return; - case 32: - code.mov(value.cvt32(), dword[addr]); - return; - case 64: - code.mov(value, qword[addr]); - return; - default: - ASSERT_FALSE("Invalid bitsize"); - } -} - -template -void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) { - switch (bitsize) { - case 8: - code.mov(code.byte[addr], value.cvt8()); - return; - case 16: - code.mov(word[addr], value.cvt16()); - return; - case 32: - code.mov(dword[addr], value.cvt32()); - return; - case 64: - code.mov(qword[addr], value); - return; - default: - ASSERT_FALSE("Invalid bitsize"); - } -} - -} // namespace - -template -void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(inst, {}, args[0]); - Devirtualize(conf.callbacks).EmitCall(code); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - Xbyak::Label abort, end; - bool require_abort_handling = false; - - if (fastmem_marker) { - // Use fastmem - const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - EmitReadMemoryMov(code, value, src_ptr); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - }); - } else { - // Use page table - ASSERT(conf.page_table); - const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - require_abort_handling = true; - EmitReadMemoryMov(code, value, src_ptr); - } - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } - - ctx.reg_alloc.DefineValue(inst, value); -} - -template -void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); - Devirtualize(conf.callbacks).EmitCall(code); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - - const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - Xbyak::Label abort, end; - bool require_abort_handling = false; - - if (fastmem_marker) { - // Use fastmem - const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - EmitWriteMemoryMov(code, dest_ptr, value); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - }); - } else { - // Use page table - ASSERT(conf.page_table); - const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - require_abort_handling = true; - EmitWriteMemoryMov(code, dest_ptr, value); - } - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } -} - -void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); -} - -void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); -} - -void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); -} - -void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryRead<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); -} - -void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.CallFunction(memory_read_128); - ctx.reg_alloc.DefineValue(inst, xmm1); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]; - - Xbyak::Label abort, end; - bool require_abort_handling = false; - - if (fastmem_marker) { - // Use fastmem - const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - code.movups(value, xword[src_ptr]); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - }); - } else { - // Use page table - ASSERT(conf.page_table); - const auto src_ptr = EmitVAddrLookup(code, ctx, 128, abort, vaddr); - require_abort_handling = true; - code.movups(value, xword[src_ptr]); - } - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } - - ctx.reg_alloc.DefineValue(inst, value); -} - -void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<8, &A64::UserCallbacks::MemoryWrite8>(ctx, inst); -} - -void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<16, &A64::UserCallbacks::MemoryWrite16>(ctx, inst); -} - -void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<32, &A64::UserCallbacks::MemoryWrite32>(ctx, inst); -} - -void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { - EmitMemoryWrite<64, &A64::UserCallbacks::MemoryWrite64>(ctx, inst); -} - -void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - code.CallFunction(memory_write_128); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]); - - const auto wrapped_fn = write_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]; - - Xbyak::Label abort, end; - bool require_abort_handling = false; - - if (fastmem_marker) { - // Use fastmem - const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - code.movups(xword[dest_ptr], value); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - }); - } else { - // Use page table - ASSERT(conf.page_table); - const auto dest_ptr = EmitVAddrLookup(code, ctx, 128, abort, vaddr); - require_abort_handling = true; - code.movups(xword[dest_ptr], value); - } - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } -} - -template -void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if constexpr (bitsize != 128) { - using T = mp::unsigned_integer_of_size; - - ctx.reg_alloc.HostCall(inst, {}, args[0]); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr) -> T { - return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { - return (conf.callbacks->*callback)(vaddr); - }); - }); - } else { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { - ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> A64::Vector { - return (conf.callbacks->*callback)(vaddr); - }); - }); - code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); - - ctx.reg_alloc.DefineValue(inst, result); - } -} - -void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst); -} - -template -void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if constexpr (bitsize != 128) { - ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); - } else { - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(inst); - } - - Xbyak::Label end; - - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.je(end); - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - if constexpr (bitsize != 128) { - using T = mp::unsigned_integer_of_size; - - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); - } else { - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.movaps(xword[code.ABI_PARAM3], xmm1); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](A64::Vector expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); - } - code.L(end); -} - -void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveWriteMemory<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveWriteMemory<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveWriteMemory<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveWriteMemory<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); -} - -void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveWriteMemory<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst); -} - std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const { const A64::LocationDescriptor descriptor{ir_descriptor}; return fmt::format("a64_{:016X}_fpcr{:08X}", diff --git a/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp new file mode 100644 index 00000000..c472a7c1 --- /dev/null +++ b/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -0,0 +1,694 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "dynarmic/backend/x64/a64_emit_x64.h" +#include "dynarmic/backend/x64/abi.h" +#include "dynarmic/backend/x64/devirtualize.h" +#include "dynarmic/backend/x64/perf_map.h" +#include "dynarmic/common/x64_disassemble.h" +#include "dynarmic/interface/exclusive_monitor.h" + +namespace Dynarmic::Backend::X64 { + +using namespace Xbyak::util; + +void A64EmitX64::GenMemory128Accessors() { + code.align(); + memory_read_128 = code.getCurr(); +#ifdef _WIN32 + Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCallWithReturnPointer(code, [&](Xbyak::Reg64 return_value_ptr, [[maybe_unused]] RegList args) { + code.mov(code.ABI_PARAM3, code.ABI_PARAM2); + code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); + code.lea(return_value_ptr, ptr[rsp + ABI_SHADOW_SPACE]); + }); + code.movups(xmm1, xword[code.ABI_RETURN]); + code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); +#else + code.sub(rsp, 8); + Devirtualize<&A64::UserCallbacks::MemoryRead128>(conf.callbacks).EmitCall(code); + if (code.HasHostFeature(HostFeature::SSE41)) { + code.movq(xmm1, code.ABI_RETURN); + code.pinsrq(xmm1, code.ABI_RETURN2, 1); + } else { + code.movq(xmm1, code.ABI_RETURN); + code.movq(xmm2, code.ABI_RETURN2); + code.punpcklqdq(xmm1, xmm2); + } + code.add(rsp, 8); +#endif + code.ret(); + PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128"); + + code.align(); + memory_write_128 = code.getCurr(); +#ifdef _WIN32 + code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.movaps(xword[code.ABI_PARAM3], xmm1); + Devirtualize<&A64::UserCallbacks::MemoryWrite128>(conf.callbacks).EmitCall(code); + code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); +#else + code.sub(rsp, 8); + if (code.HasHostFeature(HostFeature::SSE41)) { + code.movq(code.ABI_PARAM3, xmm1); + code.pextrq(code.ABI_PARAM4, xmm1, 1); + } else { + code.movq(code.ABI_PARAM3, xmm1); + code.punpckhqdq(xmm1, xmm1); + code.movq(code.ABI_PARAM4, xmm1); + } + Devirtualize<&A64::UserCallbacks::MemoryWrite128>(conf.callbacks).EmitCall(code); + code.add(rsp, 8); +#endif + code.ret(); + PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_write_128"); +} + +void A64EmitX64::GenFastmemFallbacks() { + const std::initializer_list idxes{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + const std::array, 4> read_callbacks{{ + {8, Devirtualize<&A64::UserCallbacks::MemoryRead8>(conf.callbacks)}, + {16, Devirtualize<&A64::UserCallbacks::MemoryRead16>(conf.callbacks)}, + {32, Devirtualize<&A64::UserCallbacks::MemoryRead32>(conf.callbacks)}, + {64, Devirtualize<&A64::UserCallbacks::MemoryRead64>(conf.callbacks)}, + }}; + const std::array, 4> write_callbacks{{ + {8, Devirtualize<&A64::UserCallbacks::MemoryWrite8>(conf.callbacks)}, + {16, Devirtualize<&A64::UserCallbacks::MemoryWrite16>(conf.callbacks)}, + {32, Devirtualize<&A64::UserCallbacks::MemoryWrite32>(conf.callbacks)}, + {64, Devirtualize<&A64::UserCallbacks::MemoryWrite64>(conf.callbacks)}, + }}; + + for (int vaddr_idx : idxes) { + if (vaddr_idx == 4 || vaddr_idx == 15) { + continue; + } + + for (int value_idx : idxes) { + code.align(); + read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + code.call(memory_read_128); + if (value_idx != 1) { + code.movaps(Xbyak::Xmm{value_idx}, xmm1); + } + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); + code.ret(); + PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128"); + + code.align(); + write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(code); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + if (value_idx != 1) { + code.movaps(xmm1, Xbyak::Xmm{value_idx}); + } + code.call(memory_write_128); + ABI_PopCallerSaveRegistersAndAdjustStack(code); + code.ret(); + PerfMapRegister(write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_write_fallback_128"); + + if (value_idx == 4 || value_idx == 15) { + continue; + } + + for (const auto& [bitsize, callback] : read_callbacks) { + code.align(); + read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + callback.EmitCall(code); + if (value_idx != code.ABI_RETURN.getIdx()) { + code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN); + } + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); + code.ret(); + PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_read_fallback_{}", bitsize)); + } + + for (const auto& [bitsize, callback] : write_callbacks) { + code.align(); + write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(code); + if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { + code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); + } else if (vaddr_idx == code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + } else { + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + } + callback.EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStack(code); + code.ret(); + PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_write_fallback_{}", bitsize)); + } + } + } +} + +std::optional A64EmitX64::ShouldFastmem(A64EmitContext& ctx, IR::Inst* inst) const { + if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { + return std::nullopt; + } + + const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); + if (do_not_fastmem.count(marker) > 0) { + return std::nullopt; + } + return marker; +} + +FakeCall A64EmitX64::FastmemCallback(u64 rip_) { + const auto iter = fastmem_patch_info.find(rip_); + + if (iter == fastmem_patch_info.end()) { + fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); + fmt::print("Segfault wasn't at a fastmem patch location!\n"); + fmt::print("Now dumping code.......\n\n"); + Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); + ASSERT_FALSE("iter != fastmem_patch_info.end()"); + } + + if (conf.recompile_on_fastmem_failure) { + const auto marker = iter->second.marker; + do_not_fastmem.emplace(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); + } + FakeCall ret; + ret.call_rip = iter->second.callback; + ret.ret_rip = iter->second.resume_rip; + return ret; +} + +namespace { + +constexpr size_t page_bits = 12; +constexpr size_t page_size = 1 << page_bits; +constexpr size_t page_mask = (1 << page_bits) - 1; + +void EmitDetectMisaignedVAddr(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) { + if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { + return; + } + + const u32 align_mask = [bitsize]() -> u32 { + switch (bitsize) { + case 16: + return 0b1; + case 32: + return 0b11; + case 64: + return 0b111; + case 128: + return 0b1111; + } + UNREACHABLE(); + }(); + + code.test(vaddr, align_mask); + + if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + code.jnz(abort, code.T_NEAR); + return; + } + + const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; + + Xbyak::Label detect_boundary, resume; + + code.jnz(detect_boundary, code.T_NEAR); + code.L(resume); + + code.SwitchToFarCode(); + code.L(detect_boundary); + code.mov(tmp, vaddr); + code.and_(tmp, page_align_mask); + code.cmp(tmp, page_align_mask); + code.jne(resume, code.T_NEAR); + // NOTE: We expect to fallthrough into abort code here. + code.SwitchToNearCode(); +} + +Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { + const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits; + const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; + + const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(); + + EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr, tmp); + + if (unused_top_bits == 0) { + code.mov(tmp, vaddr); + code.shr(tmp, int(page_bits)); + } else if (ctx.conf.silently_mirror_page_table) { + if (valid_page_index_bits >= 32) { + if (code.HasHostFeature(HostFeature::BMI2)) { + const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(); + code.mov(bit_count, unused_top_bits); + code.bzhi(tmp, vaddr, bit_count); + code.shr(tmp, int(page_bits)); + ctx.reg_alloc.Release(bit_count); + } else { + code.mov(tmp, vaddr); + code.shl(tmp, int(unused_top_bits)); + code.shr(tmp, int(unused_top_bits + page_bits)); + } + } else { + code.mov(tmp, vaddr); + code.shr(tmp, int(page_bits)); + code.and_(tmp, u32((1 << valid_page_index_bits) - 1)); + } + } else { + ASSERT(valid_page_index_bits < 32); + code.mov(tmp, vaddr); + code.shr(tmp, int(page_bits)); + code.test(tmp, u32(-(1 << valid_page_index_bits))); + code.jnz(abort, code.T_NEAR); + } + code.mov(page, qword[r14 + tmp * sizeof(void*)]); + if (ctx.conf.page_table_pointer_mask_bits == 0) { + code.test(page, page); + } else { + code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits); + } + code.jz(abort, code.T_NEAR); + if (ctx.conf.absolute_offset_page_table) { + return page + vaddr; + } + code.mov(tmp, vaddr); + code.and_(tmp, static_cast(page_mask)); + return page + tmp; +} + +Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, bool& require_abort_handling) { + const size_t unused_top_bits = 64 - ctx.conf.fastmem_address_space_bits; + + if (unused_top_bits == 0) { + return r13 + vaddr; + } else if (ctx.conf.silently_mirror_fastmem) { + Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + if (unused_top_bits < 32) { + code.mov(tmp, vaddr); + code.shl(tmp, int(unused_top_bits)); + code.shr(tmp, int(unused_top_bits)); + } else if (unused_top_bits == 32) { + code.mov(tmp.cvt32(), vaddr.cvt32()); + } else { + code.mov(tmp.cvt32(), vaddr.cvt32()); + code.and_(tmp, u32((1 << ctx.conf.fastmem_address_space_bits) - 1)); + } + return r13 + tmp; + } else { + if (ctx.conf.fastmem_address_space_bits < 32) { + code.test(vaddr, u32(-(1 << ctx.conf.fastmem_address_space_bits))); + code.jnz(abort, code.T_NEAR); + require_abort_handling = true; + } else { + // TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator + Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + code.mov(tmp, vaddr); + code.shr(tmp, int(ctx.conf.fastmem_address_space_bits)); + code.jnz(abort, code.T_NEAR); + require_abort_handling = true; + } + return r13 + vaddr; + } +} + +template +void EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr) { + switch (bitsize) { + case 8: + code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]); + return; + case 16: + code.movzx(Xbyak::Reg32{value_idx}, word[addr]); + return; + case 32: + code.mov(Xbyak::Reg32{value_idx}, dword[addr]); + return; + case 64: + code.mov(Xbyak::Reg64{value_idx}, qword[addr]); + return; + case 128: + code.movups(Xbyak::Xmm{value_idx}, xword[addr]); + return; + default: + ASSERT_FALSE("Invalid bitsize"); + } +} + +template +void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int value_idx) { + switch (bitsize) { + case 8: + code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); + return; + case 16: + code.mov(word[addr], Xbyak::Reg16{value_idx}); + return; + case 32: + code.mov(dword[addr], Xbyak::Reg32{value_idx}); + return; + case 64: + code.mov(qword[addr], Xbyak::Reg64{value_idx}); + return; + case 128: + code.movups(xword[addr], Xbyak::Xmm{value_idx}); + return; + default: + ASSERT_FALSE("Invalid bitsize"); + } +} + +} // namespace + +template +void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto fastmem_marker = ShouldFastmem(ctx, inst); + + if (!conf.page_table && !fastmem_marker) { + // Neither fastmem nor page table: Use callbacks + if constexpr (bitsize == 128) { + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + code.CallFunction(memory_read_128); + ctx.reg_alloc.DefineValue(inst, xmm1); + } else { + ctx.reg_alloc.HostCall(inst, {}, args[0]); + Devirtualize(conf.callbacks).EmitCall(code); + } + return; + } + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); + + const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; + + Xbyak::Label abort, end; + bool require_abort_handling = false; + + if (fastmem_marker) { + // Use fastmem + const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + + const auto location = code.getCurr(); + EmitReadMemoryMov(code, value_idx, src_ptr); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + }); + } else { + // Use page table + ASSERT(conf.page_table); + const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + require_abort_handling = true; + EmitReadMemoryMov(code, value_idx, src_ptr); + } + code.L(end); + + if (require_abort_handling) { + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } + + if constexpr (bitsize == 128) { + ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); + } else { + ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); + } +} + +template +void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto fastmem_marker = ShouldFastmem(ctx, inst); + + if (!conf.page_table && !fastmem_marker) { + // Neither fastmem nor page table: Use callbacks + if constexpr (bitsize == 128) { + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + code.CallFunction(memory_write_128); + } else { + ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + Devirtualize(conf.callbacks).EmitCall(code); + } + return; + } + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.UseXmm(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx(); + + const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; + + Xbyak::Label abort, end; + bool require_abort_handling = false; + + if (fastmem_marker) { + // Use fastmem + const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + + const auto location = code.getCurr(); + EmitWriteMemoryMov(code, dest_ptr, value_idx); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + }); + } else { + // Use page table + ASSERT(conf.page_table); + const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + require_abort_handling = true; + EmitWriteMemoryMov(code, dest_ptr, value_idx); + } + code.L(end); + + if (require_abort_handling) { + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } +} + +void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); +} + +void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); +} + +void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); +} + +void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); +} + +void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryRead<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst); +} + +void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<8, &A64::UserCallbacks::MemoryWrite8>(ctx, inst); +} + +void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<16, &A64::UserCallbacks::MemoryWrite16>(ctx, inst); +} + +void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<32, &A64::UserCallbacks::MemoryWrite32>(ctx, inst); +} + +void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<64, &A64::UserCallbacks::MemoryWrite64>(ctx, inst); +} + +void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { + EmitMemoryWrite<128, &A64::UserCallbacks::MemoryWrite64>(ctx, inst); +} + +template +void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if constexpr (bitsize != 128) { + using T = mp::unsigned_integer_of_size; + + ctx.reg_alloc.HostCall(inst, {}, args[0]); + + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); + }); + }); + } else { + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { + ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> A64::Vector { + return (conf.callbacks->*callback)(vaddr); + }); + }); + code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + + ctx.reg_alloc.DefineValue(inst, result); + } +} + +template +void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if constexpr (bitsize != 128) { + ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); + } else { + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(inst); + } + + Xbyak::Label end; + + code.mov(code.ABI_RETURN, u32(1)); + code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + if constexpr (bitsize != 128) { + using T = mp::unsigned_integer_of_size; + + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); + } else { + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.movaps(xword[code.ABI_PARAM3], xmm1); + code.CallLambda( + [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](A64::Vector expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + } + code.L(end); +} + +void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveReadMemory<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveReadMemory<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveReadMemory<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveReadMemory<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveReadMemory<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveWriteMemory<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveWriteMemory<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveWriteMemory<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveWriteMemory<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { + EmitExclusiveWriteMemory<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst); +} + +} // namespace Dynarmic::Backend::X64