diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index 2680c93c..065f2184 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -40,7 +40,7 @@ template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, args[1], args[2], args[3]); + ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]); code.MOV(Xscratch0, args[0].GetImmediateU64()); code.BLR(Xscratch0); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index f8fc5eee..3d43bcb7 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -555,7 +555,7 @@ void EmitIR(oaknut::CodeGenerator& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -576,7 +576,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -611,7 +611,7 @@ void EmitIR(oaknut::CodeGenera return; } - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp index 6f2f75c7..746757e9 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_coprocessor.cpp @@ -24,7 +24,7 @@ static void EmitCoprocessorException() { } static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { - ctx.reg_alloc.PrepareForCall(inst, {}, arg0, arg1); + const auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, arg0, arg1); if (callback.user_arg) { code.MOV(X0, reinterpret_cast(*callback.user_arg)); @@ -32,6 +32,7 @@ static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A3 code.MOV(Xscratch0, reinterpret_cast(callback.function)); code.BLR(Xscratch0); + code.MOV(Xresult, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp index e0bf558c..1cf5f774 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32_memory.cpp @@ -25,18 +25,19 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -45,11 +46,12 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if (ordered) { @@ -63,7 +65,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -79,6 +81,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); + code.MOV(Xresult, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp index 469fc5ea..00405a03 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64.cpp @@ -342,7 +342,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -363,7 +363,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCont template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); if (ctx.conf.enable_cycle_counting) { code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run)); @@ -385,14 +385,14 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised); } template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(nullptr, {}, args[0], args[1]); + ctx.reg_alloc.PrepareForCall({}, args[0], args[1]); EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised); } @@ -412,7 +412,7 @@ void EmitIR(oaknut::CodeGenera return; } - ctx.reg_alloc.PrepareForCall(nullptr); + ctx.reg_alloc.PrepareForCall(); EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised); } @@ -426,8 +426,9 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext& template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { // FIXME: AddTicks / GetTicksRemaining - ctx.reg_alloc.PrepareForCall(inst); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst); EmitRelocation(code, ctx, LinkTarget::GetCNTPCT); + code.MOV(Xresult, X0); } template<> diff --git a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp index 38062956..55c185b7 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a64_memory.cpp @@ -25,18 +25,31 @@ static bool IsOrdered(IR::AccType acctype) { static void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); EmitRelocation(code, ctx, fn); if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); +} + +static void EmitReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qresult = ctx.reg_alloc.PrepareForCallVec(inst, {}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.MOV(Qresult.B16(), Q0.B16()); } static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1]); const bool ordered = IsOrdered(args[2].GetImmediateAccType()); code.MOV(Wscratch0, 1); @@ -45,11 +58,26 @@ static void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ct if (ordered) { code.DMB(oaknut::BarrierOp::ISH); } + code.MOV(Xresult, X0); +} + +static void EmitExclusiveReadMemory128(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto Qresult = ctx.reg_alloc.PrepareForCallVec(inst, {}, args[1]); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + + code.MOV(Wscratch0, 1); + code.STRB(Wscratch0, Xstate, offsetof(A64JitState, exclusive_state)); + EmitRelocation(code, ctx, fn); + if (ordered) { + code.DMB(oaknut::BarrierOp::ISH); + } + code.MOV(Qresult.B16(), Q0.B16()); } static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.PrepareForCall({}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if (ordered) { @@ -63,7 +91,7 @@ static void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::I static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, LinkTarget fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.PrepareForCall(inst, {}, args[1], args[2]); + auto Xresult = ctx.reg_alloc.PrepareForCallReg(inst, {}, args[1], args[2]); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); oaknut::Label end; @@ -79,6 +107,7 @@ static void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& c code.DMB(oaknut::BarrierOp::ISH); } code.l(end); + code.MOV(Xresult, X0); } template<> @@ -108,7 +137,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContex template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - EmitReadMemory(code, ctx, inst, LinkTarget::ReadMemory128); + EmitReadMemory128(code, ctx, inst, LinkTarget::ReadMemory128); } template<> @@ -133,7 +162,7 @@ void EmitIR(oaknut::CodeGenerator& code, E template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { - EmitExclusiveReadMemory(code, ctx, inst, LinkTarget::ExclusiveReadMemory128); + EmitExclusiveReadMemory128(code, ctx, inst, LinkTarget::ExclusiveReadMemory128); } template<> diff --git a/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/backend/arm64/reg_alloc.cpp index 2dff0080..e40216c9 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -138,7 +138,7 @@ bool RegAlloc::IsValueLive(IR::Inst* inst) const { return !!ValueLocation(inst); } -void RegAlloc::PrepareForCall(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { +void RegAlloc::PrepareForCall(std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { fpsr_manager.Spill(); SpillFlags(); @@ -180,14 +180,20 @@ void RegAlloc::PrepareForCall(IR::Inst* result, std::optionalGetType() == IR::Type::U128) { - DefineAsRegister(result, Q0); - } else { - DefineAsRegister(result, X0); - } - } +oaknut::XReg RegAlloc::PrepareForCallReg(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { + PrepareForCall(arg0, arg1, arg2, arg3); + ASSERT(result && result->GetType() != IR::Type::U128); + DefineAsRegister(result, X0); + return X0; +} + +oaknut::QReg RegAlloc::PrepareForCallVec(IR::Inst* result, std::optional arg0, std::optional arg1, std::optional arg2, std::optional arg3) { + PrepareForCall(arg0, arg1, arg2, arg3); + ASSERT(result && result->GetType() == IR::Type::U128); + DefineAsRegister(result, Q8); + return Q8; } void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) { diff --git a/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/backend/arm64/reg_alloc.h index c8560e25..4d5c3fe7 100644 --- a/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/backend/arm64/reg_alloc.h @@ -271,11 +271,9 @@ public: } } - void PrepareForCall(IR::Inst* result = nullptr, - std::optional arg0 = {}, - std::optional arg1 = {}, - std::optional arg2 = {}, - std::optional arg3 = {}); + void PrepareForCall(std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); + oaknut::XReg PrepareForCallReg(IR::Inst* result, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); + oaknut::QReg PrepareForCallVec(IR::Inst* result, std::optional arg0 = {}, std::optional arg1 = {}, std::optional arg2 = {}, std::optional arg3 = {}); void DefineAsExisting(IR::Inst* inst, Argument& arg); void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg);