diff --git a/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/backend/riscv64/a32_address_space.cpp index 12ea5929..986ae4ca 100644 --- a/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -82,9 +82,9 @@ void A32AddressSpace::EmitPrelude() { as.FSD(FPR{i}, (32 + i) * 8 + static_cast(sizeof(StackLayout)), sp); } - as.ADDI(Xstate, a1, 0); - as.ADDI(Xhalt, a2, 0); - as.JALR(x0, 0, a0); + as.MV(Xstate, a1); + as.MV(Xhalt, a2); + as.JR(a0); prelude_info.return_from_run_code = GetCursorPtr(); for (u32 i = 1; i < 32; i += 1) { @@ -116,7 +116,10 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { ClearCache(); } - EmittedBlockInfo block_info = EmitRV64(as, std::move(block), {}); + EmittedBlockInfo block_info = EmitRV64(as, std::move(block), { + .enable_cycle_counting = conf.enable_cycle_counting, + .always_little_endian = conf.always_little_endian, + }); Link(block_info); return block_info; @@ -130,7 +133,7 @@ void A32AddressSpace::Link(EmittedBlockInfo& block_info) { switch (target) { case LinkTarget::ReturnFromRunCode: { std::ptrdiff_t off = prelude_info.return_from_run_code - reinterpret_cast(a.GetCursorPointer()); - a.JAL(x0, off); + a.J(off); break; } default: diff --git a/src/dynarmic/backend/riscv64/emit_riscv64.cpp b/src/dynarmic/backend/riscv64/emit_riscv64.cpp index 22707e3a..1ac24b3e 100644 --- a/src/dynarmic/backend/riscv64/emit_riscv64.cpp +++ b/src/dynarmic/backend/riscv64/emit_riscv64.cpp @@ -54,7 +54,7 @@ void EmitIR(biscuit::Assembler& as, EmitContext& ctx, I as.SEQZ(Xnz, Xvalue); as.SLLI(Xnz, Xnz, 30); - as.SLT(Xscratch0, Xvalue, biscuit::zero); + as.SLTZ(Xscratch0, Xvalue); as.SLLI(Xscratch0, Xscratch0, 31); as.OR(Xnz, Xnz, Xscratch0); } @@ -98,21 +98,27 @@ EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitCon reg_alloc.UpdateAllUses(); reg_alloc.AssertNoMoreUses(); - // TODO: Add Cycles + if (emit_conf.enable_cycle_counting) { + const size_t cycles_to_add = block.CycleCount(); + as.LD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp); + if (mcl::bit::sign_extend<12>(-cycles_to_add) == -cycles_to_add) { + as.ADDI(Xscratch0, Xscratch0, -cycles_to_add); + } else { + as.LI(Xscratch1, cycles_to_add); + as.SUB(Xscratch0, Xscratch0, Xscratch1); + } + as.SD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp); + } - // TODO: Emit Terminal - const auto term = block.GetTerminal(); - const IR::Term::LinkBlock* link_block_term = boost::get(&term); - ASSERT(link_block_term); - as.LI(Xscratch0, link_block_term->next.Value()); - as.SD(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate); - - ptrdiff_t offset = reinterpret_cast(as.GetCursorPointer()) - ebi.entry_point; - ebi.relocations.emplace_back(Relocation{offset, LinkTarget::ReturnFromRunCode}); - as.NOP(); + EmitA32Terminal(as, ctx); ebi.size = reinterpret_cast(as.GetCursorPointer()) - ebi.entry_point; return ebi; } +void EmitRelocation(biscuit::Assembler& as, EmitContext& ctx, LinkTarget link_target) { + ctx.ebi.relocations.emplace_back(Relocation{reinterpret_cast(as.GetCursorPointer()) - ctx.ebi.entry_point, link_target}); + as.NOP(); +} + } // namespace Dynarmic::Backend::RV64 diff --git a/src/dynarmic/backend/riscv64/emit_riscv64.h b/src/dynarmic/backend/riscv64/emit_riscv64.h index c7bd1eeb..5cfd0212 100644 --- a/src/dynarmic/backend/riscv64/emit_riscv64.h +++ b/src/dynarmic/backend/riscv64/emit_riscv64.h @@ -7,6 +7,7 @@ #include +#include #include namespace biscuit { @@ -15,8 +16,9 @@ class Assembler; namespace Dynarmic::IR { class Block; -enum class Opcode; class Inst; +enum class Cond; +enum class Opcode; } // namespace Dynarmic::IR namespace Dynarmic::Backend::RV64 { @@ -38,13 +40,19 @@ struct EmittedBlockInfo { std::vector relocations; }; -struct EmitConfig {}; +struct EmitConfig { + bool enable_cycle_counting; + bool always_little_endian; +}; struct EmitContext; -template -void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst); - EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf); +template +void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst); +void EmitRelocation(biscuit::Assembler& as, EmitContext& ctx, LinkTarget link_target); +void EmitA32Cond(biscuit::Assembler& as, EmitContext& ctx, IR::Cond cond, biscuit::Label* label); +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx); + } // namespace Dynarmic::Backend::RV64 diff --git a/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp b/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp index f22f520c..7787b4bb 100644 --- a/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp +++ b/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp @@ -17,6 +17,194 @@ namespace Dynarmic::Backend::RV64 { +void EmitA32Cond(biscuit::Assembler& as, EmitContext&, IR::Cond cond, biscuit::Label* label) { + as.LWU(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate); + as.SRLIW(Xscratch0, Xscratch0, 28); + + switch (cond) { + case IR::Cond::EQ: + // Z == 1 + as.ANDI(Xscratch0, Xscratch0, 0b0100); + as.BNEZ(Xscratch0, label); + break; + case IR::Cond::NE: + // Z = 0 + as.ANDI(Xscratch0, Xscratch0, 0b0100); + as.BEQZ(Xscratch0, label); + break; + case IR::Cond::CS: + // C == 1 + as.ANDI(Xscratch0, Xscratch0, 0b0010); + as.BNEZ(Xscratch0, label); + break; + case IR::Cond::CC: + // C == 0 + as.ANDI(Xscratch0, Xscratch0, 0b0010); + as.BEQZ(Xscratch0, label); + break; + case IR::Cond::MI: + // N == 1 + as.ANDI(Xscratch0, Xscratch0, 0b1000); + as.BNEZ(Xscratch0, label); + break; + case IR::Cond::PL: + // N == 0 + as.ANDI(Xscratch0, Xscratch0, 0b1000); + as.BEQZ(Xscratch0, label); + break; + case IR::Cond::VS: + // V == 1 + as.ANDI(Xscratch0, Xscratch0, 0b0001); + as.BNEZ(Xscratch0, label); + break; + case IR::Cond::VC: + // V == 0 + as.ANDI(Xscratch0, Xscratch0, 0b0001); + as.BEQZ(Xscratch0, label); + break; + case IR::Cond::HI: + // Z == 0 && C == 1 + as.ANDI(Xscratch0, Xscratch0, 0b0110); + as.ADDI(Xscratch1, biscuit::zero, 0b0010); + as.BEQ(Xscratch0, Xscratch1, label); + break; + case IR::Cond::LS: + // Z == 1 || C == 0 + as.ANDI(Xscratch0, Xscratch0, 0b0110); + as.ADDI(Xscratch1, biscuit::zero, 0b0010); + as.BNE(Xscratch0, Xscratch1, label); + break; + case IR::Cond::GE: + // N == V + as.ANDI(Xscratch0, Xscratch0, 0b1001); + as.ADDI(Xscratch1, biscuit::zero, 0b1001); + as.BEQ(Xscratch0, Xscratch1, label); + as.BEQZ(Xscratch0, label); + break; + case IR::Cond::LT: + // N != V + as.ANDI(Xscratch0, Xscratch0, 0b1001); + as.ADDI(Xscratch1, biscuit::zero, 0b1000); + as.BEQ(Xscratch0, Xscratch1, label); + as.ADDI(Xscratch1, biscuit::zero, 0b0001); + as.BEQ(Xscratch0, Xscratch1, label); + break; + case IR::Cond::GT: + // Z == 0 && N == V + as.ANDI(Xscratch0, Xscratch0, 0b1101); + as.ADDI(Xscratch1, biscuit::zero, 0b1001); + as.BEQ(Xscratch0, Xscratch1, label); + as.BEQZ(Xscratch0, label); + break; + case IR::Cond::LE: + // Z == 1 || N != V + as.ANDI(Xscratch0, Xscratch0, 0b1101); + as.LI(Xscratch1, 0b11000100110010); + as.SRLW(Xscratch0, Xscratch1, Xscratch0); + as.ANDI(Xscratch0, Xscratch0, 1); + as.BNEZ(Xscratch0, label); + break; + default: + ASSERT_MSG(false, "Unknown cond {}", static_cast(cond)); + break; + } +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step); + +void EmitA32Terminal(biscuit::Assembler&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) { + ASSERT_FALSE("Interpret should never be emitted."); +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) { + EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode); +} + +void EmitSetUpperLocationDescriptor(biscuit::Assembler& as, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { + auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 { + return static_cast(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32); + }; + + const u32 old_upper = get_upper(old_location); + const u32 new_upper = [&] { + const u32 mask = ~u32(ctx.emit_conf.always_little_endian ? 0x2 : 0); + return get_upper(new_location) & mask; + }(); + + if (old_upper != new_upper) { + as.LI(Xscratch0, new_upper); + as.SW(Xscratch0, offsetof(A32JitState, upper_location_descriptor), Xstate); + } +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool) { + EmitSetUpperLocationDescriptor(as, ctx, terminal.next, initial_location); + + as.LI(Xscratch0, terminal.next.Value()); + as.SW(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate); + EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode); + + // TODO: Implement LinkBlock optimization +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool) { + EmitSetUpperLocationDescriptor(as, ctx, terminal.next, initial_location); + + as.LI(Xscratch0, terminal.next.Value()); + as.SW(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate); + EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode); + + // TODO: Implement LinkBlockFast optimization +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) { + EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode); + + // TODO: Implement PopRSBHint optimization +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) { + EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode); + + // TODO: Implement FastDispatchHint optimization +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + biscuit::Label pass; + EmitA32Cond(as, ctx, terminal.if_, &pass); + EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step); + as.Bind(&pass); + EmitA32Terminal(as, ctx, terminal.then_, initial_location, is_single_step); +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + biscuit::Label fail; + as.LBU(Xscratch0, offsetof(StackLayout, check_bit), Xstate); + as.BEQZ(Xscratch0, &fail); + EmitA32Terminal(as, ctx, terminal.then_, initial_location, is_single_step); + as.Bind(&fail); + EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step); +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + biscuit::Label fail; + as.LWU(Xscratch0, 0, Xhalt); + as.FENCE(biscuit::FenceOrder::RW, biscuit::FenceOrder::RW); + as.BNEZ(Xscratch0, &fail); + EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step); + as.Bind(&fail); + EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode); +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { + boost::apply_visitor([&](const auto& t) { EmitA32Terminal(as, ctx, t, initial_location, is_single_step); }, terminal); +} + +void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx) { + const A32::LocationDescriptor location{ctx.block.Location()}; + EmitA32Terminal(as, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping()); +} + template<> void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) { const A32::Reg reg = inst->GetArg(0).GetA32RegRef();