backend/x64: Reduce conversions required for cpsr_nzcv
The guest program often accesses the NZCV flags directly much less often than we need to use them for jumps and other such uses. Therefore, we store our flags in cpsr_nzcv in a x64-friendly format. This allows for a reduction in conditional jump related code.
This commit is contained in:
parent
f4922a97f6
commit
8b3bc92bce
7 changed files with 213 additions and 189 deletions
|
@ -19,6 +19,7 @@
|
|||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/devirtualize.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "backend/x64/nzcv_util.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
@ -307,6 +308,12 @@ void A32EmitX64::GenTerminalHandlers() {
|
|||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, check_bit)], to_store);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -384,6 +391,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// Here we observe that cpsr_et and cpsr_ge are right next to each other in memory,
|
||||
// so we load them both at the same time with one 64-bit read. This allows us to
|
||||
|
@ -397,7 +405,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.shl(tmp, 27);
|
||||
code.or_(result, tmp);
|
||||
code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(tmp2, tmp2, tmp);
|
||||
code.shl(tmp2, 28);
|
||||
code.or_(result, tmp2);
|
||||
code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -430,7 +442,9 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
// cpsr_nzcv
|
||||
code.mov(tmp, cpsr);
|
||||
code.and_(tmp, 0xF0000000);
|
||||
code.shr(tmp, 28);
|
||||
code.mov(tmp2, NZCV::x64_mask);
|
||||
code.pdep(tmp, tmp, tmp2);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp);
|
||||
|
||||
// cpsr_jaifm
|
||||
|
@ -470,11 +484,21 @@ void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[0].IsImmediate()) {
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], u32(imm & 0xF0000000));
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.and_(a, 0xF0000000);
|
||||
code.shr(a, 28);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
code.and_(a, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
}
|
||||
}
|
||||
|
@ -484,129 +508,90 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[0].IsImmediate()) {
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], u32(imm & 0xF0000000));
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
|
||||
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.bt(a, 27);
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.and_(a, 0xF0000000);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
code.and_(a, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.shr(result, 31);
|
||||
if (flag_bit != 0) {
|
||||
code.shr(result, static_cast<int>(flag_bit));
|
||||
}
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
static void EmitSetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
|
||||
const u32 flag_mask = 1u << flag_bit;
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask);
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
if (flag_bit != 0) {
|
||||
code.shl(to_store, static_cast<int>(flag_bit));
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
} else {
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store.cvt8());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitGetFlag(code, ctx, inst, NZCV::x64_n_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
constexpr size_t flag_bit = 31;
|
||||
constexpr u32 flag_mask = 1u << flag_bit;
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask);
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shl(to_store, flag_bit);
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
EmitSetFlag(code, ctx, inst, NZCV::x64_n_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetZFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.shr(result, 30);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
EmitGetFlag(code, ctx, inst, NZCV::x64_z_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
constexpr size_t flag_bit = 30;
|
||||
constexpr u32 flag_mask = 1u << flag_bit;
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask);
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shl(to_store, flag_bit);
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, check_bit)], to_store);
|
||||
EmitSetFlag(code, ctx, inst, NZCV::x64_z_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.shr(result, 29);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
EmitGetFlag(code, ctx, inst, NZCV::x64_c_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
constexpr size_t flag_bit = 29;
|
||||
constexpr u32 flag_mask = 1u << flag_bit;
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask);
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shl(to_store, flag_bit);
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
EmitSetFlag(code, ctx, inst, NZCV::x64_c_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetVFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.shr(result, 28);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
EmitGetFlag(code, ctx, inst, NZCV::x64_v_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
constexpr size_t flag_bit = 28;
|
||||
constexpr u32 flag_mask = 1u << flag_bit;
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], flag_mask);
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shl(to_store, flag_bit);
|
||||
code.and_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], ~flag_mask);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
EmitSetFlag(code, ctx, inst, NZCV::x64_v_flag_bit);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "backend/x64/a32_jitstate.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/nzcv_util.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
|
@ -45,14 +46,14 @@ namespace Dynarmic::Backend::X64 {
|
|||
*/
|
||||
|
||||
u32 A32JitState::Cpsr() const {
|
||||
DEBUG_ASSERT((cpsr_nzcv & ~0xF0000000) == 0);
|
||||
DEBUG_ASSERT((cpsr_nzcv & ~NZCV::x64_mask) == 0);
|
||||
DEBUG_ASSERT((cpsr_q & ~1) == 0);
|
||||
DEBUG_ASSERT((cpsr_jaifm & ~0x010001DF) == 0);
|
||||
|
||||
u32 cpsr = 0;
|
||||
|
||||
// NZCV flags
|
||||
cpsr |= cpsr_nzcv;
|
||||
cpsr |= NZCV::FromX64(cpsr_nzcv);
|
||||
// Q flag
|
||||
cpsr |= cpsr_q ? 1 << 27 : 0;
|
||||
// GE flags
|
||||
|
@ -74,7 +75,7 @@ u32 A32JitState::Cpsr() const {
|
|||
|
||||
void A32JitState::SetCpsr(u32 cpsr) {
|
||||
// NZCV flags
|
||||
cpsr_nzcv = cpsr & 0xF0000000;
|
||||
cpsr_nzcv = NZCV::ToX64(cpsr);
|
||||
// Q flag
|
||||
cpsr_q = Common::Bit<27>(cpsr) ? 1 : 0;
|
||||
// GE flags
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/devirtualize.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "backend/x64/nzcv_util.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
@ -371,7 +372,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.shr(result, 29);
|
||||
code.shr(result, NZCV::x64_c_flag_bit);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -380,6 +381,9 @@ void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.and_(nzcv_raw, NZCV::x64_mask);
|
||||
code.imul(nzcv_raw, nzcv_raw, NZCV::from_x64_multiplier);
|
||||
code.and_(nzcv_raw, NZCV::arm_mask);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv_raw);
|
||||
}
|
||||
|
||||
|
@ -387,17 +391,15 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.and_(nzcv_raw, 0xF0000000);
|
||||
code.shr(nzcv_raw, 28);
|
||||
code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier);
|
||||
code.and_(nzcv_raw, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
code.and_(to_store, 0b11000001'00000001);
|
||||
code.imul(to_store, to_store, 0b00010000'00100001);
|
||||
code.shl(to_store, 16);
|
||||
code.and_(to_store, 0xF0000000);
|
||||
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend/x64/nzcv_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "frontend/A64/location_descriptor.h"
|
||||
|
||||
|
@ -33,10 +34,10 @@ struct A64JitState {
|
|||
u32 cpsr_nzcv = 0;
|
||||
|
||||
u32 GetPstate() const {
|
||||
return cpsr_nzcv;
|
||||
return NZCV::FromX64(cpsr_nzcv);
|
||||
}
|
||||
void SetPstate(u32 new_pstate) {
|
||||
cpsr_nzcv = new_pstate & 0xF0000000;
|
||||
cpsr_nzcv = NZCV::ToX64(new_pstate);
|
||||
}
|
||||
|
||||
alignas(16) std::array<u64, 64> vec{}; // Extension registers.
|
||||
|
|
|
@ -174,115 +174,82 @@ void EmitX64::EmitAddCycles(size_t cycles) {
|
|||
}
|
||||
|
||||
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||
Xbyak::Label label;
|
||||
Xbyak::Label pass;
|
||||
|
||||
const Xbyak::Reg32 cpsr = eax;
|
||||
code.mov(cpsr, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
|
||||
constexpr size_t n_shift = 31;
|
||||
constexpr size_t z_shift = 30;
|
||||
constexpr size_t c_shift = 29;
|
||||
constexpr size_t v_shift = 28;
|
||||
constexpr u32 n_mask = 1u << n_shift;
|
||||
constexpr u32 z_mask = 1u << z_shift;
|
||||
constexpr u32 c_mask = 1u << c_shift;
|
||||
constexpr u32 v_mask = 1u << v_shift;
|
||||
// sahf restores SF, ZF, CF
|
||||
// add al, 0x7F restores OF
|
||||
|
||||
switch (cond) {
|
||||
case IR::Cond::EQ: //z
|
||||
code.test(cpsr, z_mask);
|
||||
code.jnz(label);
|
||||
code.sahf();
|
||||
code.jz(pass);
|
||||
break;
|
||||
case IR::Cond::NE: //!z
|
||||
code.test(cpsr, z_mask);
|
||||
code.jz(label);
|
||||
code.sahf();
|
||||
code.jnz(pass);
|
||||
break;
|
||||
case IR::Cond::CS: //c
|
||||
code.test(cpsr, c_mask);
|
||||
code.jnz(label);
|
||||
code.sahf();
|
||||
code.jc(pass);
|
||||
break;
|
||||
case IR::Cond::CC: //!c
|
||||
code.test(cpsr, c_mask);
|
||||
code.jz(label);
|
||||
code.sahf();
|
||||
code.jnc(pass);
|
||||
break;
|
||||
case IR::Cond::MI: //n
|
||||
code.test(cpsr, n_mask);
|
||||
code.jnz(label);
|
||||
code.sahf();
|
||||
code.js(pass);
|
||||
break;
|
||||
case IR::Cond::PL: //!n
|
||||
code.test(cpsr, n_mask);
|
||||
code.jz(label);
|
||||
code.sahf();
|
||||
code.jns(pass);
|
||||
break;
|
||||
case IR::Cond::VS: //v
|
||||
code.test(cpsr, v_mask);
|
||||
code.jnz(label);
|
||||
code.add(al, 0x7F);
|
||||
code.jo(pass);
|
||||
break;
|
||||
case IR::Cond::VC: //!v
|
||||
code.test(cpsr, v_mask);
|
||||
code.jz(label);
|
||||
code.add(al, 0x7F);
|
||||
code.jno(pass);
|
||||
break;
|
||||
case IR::Cond::HI: { //c & !z
|
||||
code.and_(cpsr, z_mask | c_mask);
|
||||
code.cmp(cpsr, c_mask);
|
||||
code.je(label);
|
||||
case IR::Cond::HI: //c & !z
|
||||
code.sahf();
|
||||
code.cmc();
|
||||
code.ja(pass);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::LS: { //!c | z
|
||||
code.and_(cpsr, z_mask | c_mask);
|
||||
code.cmp(cpsr, c_mask);
|
||||
code.jne(label);
|
||||
case IR::Cond::LS: //!c | z
|
||||
code.sahf();
|
||||
code.cmc();
|
||||
code.jna(pass);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::GE: { // n == v
|
||||
code.and_(cpsr, n_mask | v_mask);
|
||||
code.jz(label);
|
||||
code.cmp(cpsr, n_mask | v_mask);
|
||||
code.je(label);
|
||||
case IR::Cond::GE: // n == v
|
||||
code.add(al, 0x7F);
|
||||
code.sahf();
|
||||
code.jge(pass);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::LT: { // n != v
|
||||
Xbyak::Label fail;
|
||||
code.and_(cpsr, n_mask | v_mask);
|
||||
code.jz(fail);
|
||||
code.cmp(cpsr, n_mask | v_mask);
|
||||
code.jne(label);
|
||||
code.L(fail);
|
||||
case IR::Cond::LT: // n != v
|
||||
code.add(al, 0x7F);
|
||||
code.sahf();
|
||||
code.jl(pass);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::GT: { // !z & (n == v)
|
||||
const Xbyak::Reg32 tmp1 = ebx;
|
||||
const Xbyak::Reg32 tmp2 = esi;
|
||||
code.mov(tmp1, cpsr);
|
||||
code.mov(tmp2, cpsr);
|
||||
code.shr(tmp1, n_shift);
|
||||
code.shr(tmp2, v_shift);
|
||||
code.shr(cpsr, z_shift);
|
||||
code.xor_(tmp1, tmp2);
|
||||
code.or_(tmp1, cpsr);
|
||||
code.test(tmp1, 1);
|
||||
code.jz(label);
|
||||
case IR::Cond::GT: // !z & (n == v)
|
||||
code.add(al, 0x7F);
|
||||
code.sahf();
|
||||
code.jg(pass);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::LE: { // z | (n != v)
|
||||
const Xbyak::Reg32 tmp1 = ebx;
|
||||
const Xbyak::Reg32 tmp2 = esi;
|
||||
code.mov(tmp1, cpsr);
|
||||
code.mov(tmp2, cpsr);
|
||||
code.shr(tmp1, n_shift);
|
||||
code.shr(tmp2, v_shift);
|
||||
code.shr(cpsr, z_shift);
|
||||
code.xor_(tmp1, tmp2);
|
||||
code.or_(tmp1, cpsr);
|
||||
code.test(tmp1, 1);
|
||||
code.jnz(label);
|
||||
case IR::Cond::LE: // z | (n != v)
|
||||
code.add(al, 0x7F);
|
||||
code.sahf();
|
||||
code.jle(pass);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
|
||||
break;
|
||||
}
|
||||
|
||||
return label;
|
||||
return pass;
|
||||
}
|
||||
|
||||
EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
|
||||
|
|
|
@ -126,56 +126,71 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize);
|
||||
|
||||
code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
// TODO: Flag optimization
|
||||
code.shr(nzcv, 28);
|
||||
code.imul(nzcv, nzcv, 0b00010000'10000001);
|
||||
code.and_(nzcv.cvt8(), 1);
|
||||
code.add(nzcv.cvt8(), 0x7F); // restore OF
|
||||
code.sahf(); // restore SF, ZF, CF
|
||||
|
||||
// sahf restores SF, ZF, CF
|
||||
// add al, 0x7F restores OF
|
||||
|
||||
switch (args[0].GetImmediateCond()) {
|
||||
case IR::Cond::EQ: //z
|
||||
code.sahf();
|
||||
code.cmovz(else_, then_);
|
||||
break;
|
||||
case IR::Cond::NE: //!z
|
||||
code.sahf();
|
||||
code.cmovnz(else_, then_);
|
||||
break;
|
||||
case IR::Cond::CS: //c
|
||||
code.sahf();
|
||||
code.cmovc(else_, then_);
|
||||
break;
|
||||
case IR::Cond::CC: //!c
|
||||
code.sahf();
|
||||
code.cmovnc(else_, then_);
|
||||
break;
|
||||
case IR::Cond::MI: //n
|
||||
code.sahf();
|
||||
code.cmovs(else_, then_);
|
||||
break;
|
||||
case IR::Cond::PL: //!n
|
||||
code.sahf();
|
||||
code.cmovns(else_, then_);
|
||||
break;
|
||||
case IR::Cond::VS: //v
|
||||
code.add(nzcv.cvt8(), 0x7F);
|
||||
code.cmovo(else_, then_);
|
||||
break;
|
||||
case IR::Cond::VC: //!v
|
||||
code.add(nzcv.cvt8(), 0x7F);
|
||||
code.cmovno(else_, then_);
|
||||
break;
|
||||
case IR::Cond::HI: //c & !z
|
||||
code.sahf();
|
||||
code.cmc();
|
||||
code.cmova(else_, then_);
|
||||
break;
|
||||
case IR::Cond::LS: //!c | z
|
||||
code.sahf();
|
||||
code.cmc();
|
||||
code.cmovna(else_, then_);
|
||||
break;
|
||||
case IR::Cond::GE: // n == v
|
||||
code.add(nzcv.cvt8(), 0x7F);
|
||||
code.sahf();
|
||||
code.cmovge(else_, then_);
|
||||
break;
|
||||
case IR::Cond::LT: // n != v
|
||||
code.add(nzcv.cvt8(), 0x7F);
|
||||
code.sahf();
|
||||
code.cmovl(else_, then_);
|
||||
break;
|
||||
case IR::Cond::GT: // !z & (n == v)
|
||||
code.add(nzcv.cvt8(), 0x7F);
|
||||
code.sahf();
|
||||
code.cmovg(else_, then_);
|
||||
break;
|
||||
case IR::Cond::LE: // z | (n != v)
|
||||
code.add(nzcv.cvt8(), 0x7F);
|
||||
code.sahf();
|
||||
code.cmovle(else_, then_);
|
||||
break;
|
||||
case IR::Cond::AL:
|
||||
|
|
53
src/backend/x64/nzcv_util.h
Normal file
53
src/backend/x64/nzcv_util.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/bit_util.h"
|
||||
|
||||
namespace Dynarmic::Backend::X64::NZCV {
|
||||
|
||||
constexpr u32 arm_mask = 0xF000'0000;
|
||||
constexpr u32 x64_mask = 0xC101;
|
||||
|
||||
constexpr size_t x64_n_flag_bit = 15;
|
||||
constexpr size_t x64_z_flag_bit = 14;
|
||||
constexpr size_t x64_c_flag_bit = 8;
|
||||
constexpr size_t x64_v_flag_bit = 0;
|
||||
|
||||
/// This is a constant used to create the x64 flags format from the ARM format.
|
||||
/// NZCV * multiplier: NZCV0NZCV000NZCV
|
||||
/// x64_flags format: NZ-----C-------V
|
||||
constexpr u32 to_x64_multiplier = 0x1081;
|
||||
|
||||
/// This is a constant used to create the ARM format from the x64 flags format.
|
||||
constexpr u32 from_x64_multiplier = 0x1021'0000;
|
||||
|
||||
inline u32 ToX64(u32 nzcv) {
|
||||
/* Naive implementation:
|
||||
u32 x64_flags = 0;
|
||||
x64_flags |= Common::Bit<31>(cpsr) ? 1 << 15 : 0;
|
||||
x64_flags |= Common::Bit<30>(cpsr) ? 1 << 14 : 0;
|
||||
x64_flags |= Common::Bit<29>(cpsr) ? 1 << 8 : 0;
|
||||
x64_flags |= Common::Bit<28>(cpsr) ? 1 : 0;
|
||||
return x64_flags;
|
||||
*/
|
||||
return ((nzcv >> 28) * to_x64_multiplier) & x64_mask;
|
||||
}
|
||||
|
||||
inline u32 FromX64(u32 x64_flags) {
|
||||
/* Naive implementation:
|
||||
u32 nzcv = 0;
|
||||
nzcv |= Common::Bit<15>(x64_flags) ? 1 << 31 : 0;
|
||||
nzcv |= Common::Bit<14>(x64_flags) ? 1 << 30 : 0;
|
||||
nzcv |= Common::Bit<8>(x64_flags) ? 1 << 29 : 0;
|
||||
nzcv |= Common::Bit<0>(x64_flags) ? 1 << 28 : 0;
|
||||
return nzcv;
|
||||
*/
|
||||
return ((x64_flags & x64_mask) * from_x64_multiplier) & arm_mask;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64::NZCV
|
Loading…
Reference in a new issue