jit_state: Split off CPSR.{E,T}
This allows us to improve code-emission for PopRSBHint. We also improve code emission other terminals at the same time.
This commit is contained in:
parent
3cca3bbd0b
commit
311361b409
5 changed files with 67 additions and 72 deletions
|
@ -26,10 +26,10 @@ computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
|
|||
u64 LocationDescriptor::UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc);
|
||||
u64 fpscr_u64 = u64(fpscr.Value()) << 32;
|
||||
u64 t_u64 = cpsr.T() ? (1ull << 35) : 0;
|
||||
u64 e_u64 = cpsr.E() ? (1ull << 39) : 0;
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
|
||||
|
@ -120,12 +120,10 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
|
|||
using namespace Xbyak::util;
|
||||
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ebx, MJitStateCpsr());
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->and_(ebx, u32((1 << 5) | (1 << 9)));
|
||||
code->shr(ebx, 2);
|
||||
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
||||
code->shl(rbx, 32);
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
|
|
|
@ -362,12 +362,12 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
||||
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
auto args = reg_alloc.GetArgumentInfo(inst);
|
||||
auto& arg = args[0];
|
||||
|
||||
const u32 T_bit = 1 << 5;
|
||||
|
||||
// Pseudocode:
|
||||
// if (new_pc & 1) {
|
||||
// new_pc &= 0xFFFFFFFE;
|
||||
|
@ -376,36 +376,41 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
|
|||
// new_pc &= 0xFFFFFFFC;
|
||||
// cpsr.T = false;
|
||||
// }
|
||||
// We rely on the fact we disallow EFlag from changing within a block.
|
||||
|
||||
if (arg.IsImmediate()) {
|
||||
u32 new_pc = arg.GetImmediateU32();
|
||||
if (Common::Bit<0>(new_pc)) {
|
||||
new_pc &= 0xFFFFFFFE;
|
||||
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
||||
code->or_(MJitStateCpsr_other(), T_bit);
|
||||
} else {
|
||||
new_pc &= 0xFFFFFFFC;
|
||||
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
||||
code->and_(MJitStateCpsr_other(), ~T_bit);
|
||||
}
|
||||
} else {
|
||||
using Xbyak::util::ptr;
|
||||
u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
|
||||
u32 et = 0;
|
||||
et |= block.Location().EFlag() ? 2 : 0;
|
||||
et |= Common::Bit<0>(new_pc) ? 1 : 0;
|
||||
|
||||
code->mov(MJitStateReg(Arm::Reg::PC), new_pc & mask);
|
||||
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
|
||||
} else {
|
||||
if (block.Location().EFlag()) {
|
||||
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
|
||||
Xbyak::Reg32 tmp1 = reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 tmp2 = reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 et = reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code->mov(tmp1, MJitStateCpsr_other());
|
||||
code->mov(tmp2, tmp1);
|
||||
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0
|
||||
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1
|
||||
code->test(new_pc, u32(1));
|
||||
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1
|
||||
code->mov(MJitStateCpsr_other(), tmp1);
|
||||
code->lea(tmp2, ptr[new_pc.cvt64() + new_pc.cvt64() * 1]);
|
||||
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
||||
code->and_(new_pc, tmp2);
|
||||
code->mov(mask, new_pc);
|
||||
code->and_(mask, 1);
|
||||
code->lea(et, ptr[mask.cvt64() + 2]);
|
||||
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
|
||||
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
||||
code->and_(new_pc, mask);
|
||||
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
||||
} else {
|
||||
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
|
||||
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code->mov(mask, new_pc);
|
||||
code->and_(mask, 1);
|
||||
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], mask);
|
||||
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
||||
code->and_(new_pc, mask);
|
||||
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3371,22 +3376,18 @@ void EmitX64::EmitTerminal(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
|
|||
code->ReturnFromRunCode();
|
||||
}
|
||||
|
||||
static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) {
|
||||
u32 et = 0;
|
||||
et |= desc.EFlag() ? 2 : 0;
|
||||
et |= desc.TFlag() ? 1 : 0;
|
||||
return et;
|
||||
}
|
||||
|
||||
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
||||
if (terminal.next.TFlag()) {
|
||||
code->or_(MJitStateCpsr_other(), u32(1 << 5));
|
||||
} else {
|
||||
code->and_(MJitStateCpsr_other(), u32(~(1 << 5)));
|
||||
}
|
||||
}
|
||||
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
||||
if (terminal.next.EFlag()) {
|
||||
code->or_(MJitStateCpsr_other(), u32(1 << 9));
|
||||
} else {
|
||||
code->and_(MJitStateCpsr_other(), u32(~(1 << 9)));
|
||||
}
|
||||
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
|
||||
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
|
||||
}
|
||||
|
||||
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
|
||||
|
@ -3412,19 +3413,8 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor
|
|||
void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
||||
if (terminal.next.TFlag()) {
|
||||
code->or_(MJitStateCpsr_other(), u32(1 << 5));
|
||||
} else {
|
||||
code->and_(MJitStateCpsr_other(), u32(~(1 << 5)));
|
||||
}
|
||||
}
|
||||
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
||||
if (terminal.next.EFlag()) {
|
||||
code->or_(MJitStateCpsr_other(), u32(1 << 9));
|
||||
} else {
|
||||
code->and_(MJitStateCpsr_other(), u32(~(1 << 9)));
|
||||
}
|
||||
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
|
||||
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
|
||||
}
|
||||
|
||||
patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr());
|
||||
|
@ -3439,12 +3429,11 @@ void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) {
|
|||
using namespace Xbyak::util;
|
||||
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ebx, MJitStateCpsr_other());
|
||||
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->and_(ebx, u32((1 << 5) | (1 << 9)));
|
||||
code->shr(ebx, 2);
|
||||
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
||||
code->shl(rbx, 32);
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]);
|
||||
|
|
|
@ -52,6 +52,9 @@ u32 JitState::Cpsr() const {
|
|||
cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0;
|
||||
cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0;
|
||||
cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0;
|
||||
// E flag, T flag
|
||||
cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0;
|
||||
cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0;
|
||||
// Other flags
|
||||
cpsr |= CPSR_other;
|
||||
|
||||
|
@ -65,8 +68,12 @@ void JitState::SetCpsr(u32 cpsr) {
|
|||
CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
|
||||
CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
|
||||
CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
|
||||
// E flag, T flag
|
||||
CPSR_et = 0;
|
||||
CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0;
|
||||
CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0;
|
||||
// Other flags
|
||||
CPSR_other = cpsr & 0xFFF0FFFF;
|
||||
CPSR_other = cpsr & 0xFFF0FDDF;
|
||||
}
|
||||
|
||||
void JitState::ResetRSB() {
|
||||
|
|
|
@ -30,6 +30,7 @@ struct JitState {
|
|||
|
||||
u32 CPSR_other = 0;
|
||||
u32 CPSR_ge = 0;
|
||||
u32 CPSR_et = 0;
|
||||
u32 Cpsr() const;
|
||||
void SetCpsr(u32 cpsr);
|
||||
|
||||
|
|
|
@ -75,10 +75,10 @@ public:
|
|||
u64 UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc);
|
||||
u64 fpscr_u64 = u64(fpscr.Value()) << 32;
|
||||
u64 t_u64 = cpsr.T() ? (1ull << 35) : 0;
|
||||
u64 e_u64 = cpsr.E() ? (1ull << 39) : 0;
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue