VFP: Implement {Get,Set}ExtendedRegister{32,64}

This commit is contained in:
MerryMage 2016-08-05 18:54:19 +01:00
parent d31bbd6d14
commit 640ce48baa
15 changed files with 262 additions and 16 deletions

View file

@ -3,6 +3,7 @@ include_directories(.)
set(SRCS
backend_x64/emit_x64.cpp
backend_x64/interface_x64.cpp
backend_x64/jitstate.cpp
backend_x64/reg_alloc.cpp
backend_x64/routines.cpp
common/memory_util.cpp

View file

@ -26,6 +26,18 @@ static OpArg MJitStateReg(Arm::Reg reg) {
return MDisp(R15, offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg));
}
static OpArg MJitStateExtReg(Arm::ExtReg reg) {
if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) {
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::S0);
return MDisp(R15, int(offsetof(JitState, ExtReg) + sizeof(u32) * index));
}
if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) {
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::D0);
return MDisp(R15, int(offsetof(JitState, ExtReg) + sizeof(u64) * index));
}
ASSERT_MSG(false, "Should never happen.");
}
static OpArg MJitStateCpsr() {
return MDisp(R15, offsetof(JitState, Cpsr));
}
@ -104,6 +116,20 @@ void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) {
code->MOV(32, R(result), MJitStateReg(reg));
}
void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) {
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
X64Reg result = reg_alloc.DefRegister(inst, any_xmm);
code->MOVSS(result, MJitStateExtReg(reg));
}
void EmitX64::EmitGetExtendedRegister64(IR::Block&, IR::Inst* inst) {
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
ASSERT(reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31);
X64Reg result = reg_alloc.DefRegister(inst, any_xmm);
code->MOVSD(result, MJitStateExtReg(reg));
}
void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) {
Arm::Reg reg = inst->GetArg(0).GetRegRef();
IR::Value arg = inst->GetArg(1);
@ -115,6 +141,20 @@ void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) {
}
}
void EmitX64::EmitSetExtendedRegister32(IR::Block&, IR::Inst* inst) {
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
X64Reg source = reg_alloc.UseRegister(inst->GetArg(1), any_xmm);
code->MOVSS(MJitStateExtReg(reg), source);
}
void EmitX64::EmitSetExtendedRegister64(IR::Block&, IR::Inst* inst) {
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
ASSERT(reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31);
X64Reg source = reg_alloc.UseRegister(inst->GetArg(1), any_xmm);
code->MOVSD(MJitStateExtReg(reg), source);
}
void EmitX64::EmitGetNFlag(IR::Block&, IR::Inst* inst) {
X64Reg result = reg_alloc.DefRegister(inst, any_gpr);
code->MOV(32, R(result), MJitStateCpsr());
@ -1128,7 +1168,7 @@ void EmitX64::EmitCondPrelude(Arm::Cond cond,
CCFlags cc = EmitCond(code, cond);
// TODO: Improve, maybe.
auto fixup = code->J_CC(cc);
auto fixup = code->J_CC(cc, true);
EmitAddCycles(1); // TODO: Proper cycle count
EmitTerminalLinkBlock(IR::Term::LinkBlock{cond_failed.get()}, initial_location);
code->SetJumpTarget(fixup);
@ -1205,7 +1245,7 @@ void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescript
void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) {
CCFlags cc = EmitCond(code, terminal.if_);
auto fixup = code->J_CC(cc);
auto fixup = code->J_CC(cc, true);
EmitTerminal(terminal.else_, initial_location);
code->SetJumpTarget(fixup);
EmitTerminal(terminal.then_, initial_location);

View file

@ -48,7 +48,7 @@ struct Jit::Impl {
bool TFlag = Common::Bit<5>(jit_state.Cpsr);
bool EFlag = Common::Bit<9>(jit_state.Cpsr);
Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.Fpscr};
Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.guest_FPSCR_flags};
CodePtr code_ptr = GetBasicBlock(descriptor)->code_ptr;
return routines.RunCode(&jit_state, code_ptr, cycle_count);
@ -150,13 +150,30 @@ std::array<u32, 16> Jit::Regs() const {
return impl->jit_state.Reg;
}
std::array<u32, 64>& Jit::ExtRegs() {
return impl->jit_state.ExtReg;
}
std::array<u32, 64> Jit::ExtRegs() const {
return impl->jit_state.ExtReg;
}
u32& Jit::Cpsr() {
return impl->jit_state.Cpsr;
}
u32 Jit::Cpsr() const {
return impl->jit_state.Cpsr;
}
u32 Jit::Fpscr() const {
return impl->jit_state.Fpscr();
}
void Jit::SetFpscr(u32 value) const {
return impl->jit_state.SetFpscr(value);
}
std::string Jit::Disassemble(Arm::LocationDescriptor descriptor) {
return impl->Disassemble(descriptor);
}

View file

@ -0,0 +1,113 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend_x64/jitstate.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
namespace Dynarmic {
namespace BackendX64 {
/**
* Comparing MXCSR and FPSCR
* =========================
*
* SSE MXCSR exception flags
* -------------------------
* PE bit 5 Precision Flag
* UE bit 4 Underflow Flag
* OE bit 3 Overflow Flag
* ZE bit 2 Divide By Zero Flag
* DE bit 1 Denormal Flag
* IE bit 0 Invalid Operation Flag
*
* VFP FPSCR cumulative exception bits
* -----------------------------------
* IDC bit 7 Input Denormal cumulative exception bit
* IXC bit 4 Inexact cumulative exception bit
* UFC bit 3 Underflow cumulative exception bit
* OFC bit 2 Overflow cumulative exception bit
* DZC bit 1 Division by Zero cumulative exception bit
* IOC bit 0 Invalid Operation cumulative exception bit
*
* SSE MSCSR exception masks
* -------------------------
* PM bit 12 Precision Mask
* UM bit 11 Underflow Mask
* OM bit 10 Overflow Mask
* ZM bit 9 Divide By Zero Mask
* DM bit 8 Denormal Mask
* IM bit 7 Invalid Operation Mask
*
* VFP FPSCR exception trap enables
* --------------------------------
* IDE bit 15 Input Denormal exception trap enable
* IXE bit 12 Inexact exception trap enable
* UFE bit 11 Underflow exception trap enable
* OFE bit 10 Overflow exception trap enable
* DZE bit 9 Division by Zero exception trap enable
* IOE bit 8 Invalid Operation exception trap enable
*
* SSE MXCSR mode bits
* -------------------
* FZ bit 15 Flush To Zero
* DAZ bit 6 Denormals Are Zero
* RN bits 13-14 Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero}
*
* VFP FPSCR mode bits
* -------------------
* DN bit 25 Default NaN
* FZ bit 24 Flush to Zero
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
* Stride bits 20-21 Vector stride
* Len bits 16-18 Vector length
*/
// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000;
u32 JitState::Fpscr() const {
ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0);
u32 FPSCR = guest_FPSCR_flags;
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
FPSCR |= (guest_MXCSR & 0b0000000000010) << 6; // IDC = DE
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
if (!Common::Bit<24>(FPSCR)) {
// ARM only sets IDC if FTZ == 1.
FPSCR &= ~(1 << 7);
FPSCR |= old_FPSCR & (1 << 7);
}
return FPSCR;
}
void JitState::SetFpscr(u32 FPSCR) {
old_FPSCR = FPSCR;
guest_FPSCR_flags = FPSCR & FPSCR_MASK;
guest_MXCSR = 0;
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC
guest_MXCSR |= ( FPSCR >> 6) & 0b0000000000010; // DE = IDC
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE
guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE
guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE
if (Common::Bit<24>(FPSCR)) {
// VFP Flush to Zero
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
}
}
} // namespace BackendX64
} // namespace Dynarmic

View file

@ -3,6 +3,7 @@
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
@ -19,13 +20,20 @@ struct JitState {
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
u32 Fpscr = 0;
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
std::array<u64, SpillCount> Spill{}; // Spill.
// For internal use (See: Routines::RunCode)
u32 guest_MXCSR = 0;
u32 save_host_MXCSR = 0;
u64 save_host_RSP = 0;
s64 cycles_remaining = 0;
u32 guest_FPSCR_flags = 0;
u32 old_FPSCR = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);
};
using CodePtr = const u8*;

View file

@ -111,9 +111,14 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, Hos
}
}
Gen::X64Reg use_reg = UseRegister(use_inst, any_gpr);
bool is_floating_point = use_inst->GetType() == IR::Type::F32 || use_inst->GetType() == IR::Type::F64;
Gen::X64Reg use_reg = UseRegister(use_inst, is_floating_point ? any_xmm : any_gpr);
Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations);
code->MOV(64, Gen::R(def_reg), Gen::R(use_reg));
if (is_floating_point) {
code->MOVAPD(def_reg, Gen::R(use_reg));
} else {
code->MOV(64, Gen::R(def_reg), Gen::R(use_reg));
}
return def_reg;
}

View file

@ -41,12 +41,17 @@ void Routines::GenRunCode() {
MOV(64, R(R15), R(ABI_PARAM1));
MOV(64, MDisp(R15, offsetof(JitState, save_host_RSP)), R(RSP));
STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
JMPptr(R(ABI_PARAM2));
}
void Routines::GenReturnFromRunCode(XEmitter* code) const {
code->STMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
code->LDMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
code->MOV(64, R(RSP), MDisp(R15, offsetof(JitState, save_host_RSP)));
code->ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
code->RET();
}

View file

@ -51,6 +51,17 @@ inline const char* RegToString(Reg reg) {
return reg_strs.at(static_cast<size_t>(reg));
}
enum class ExtReg {
S0, S1, S2, S3, S4, S5, S6, S7,
S8, S9, S10, S11, S12, S13, S14, S15,
S16, S17, S18, S19, S20, S21, S22, S23,
S24, S25, S26, S27, S28, S29, S30, S31,
D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31,
};
using Imm3 = u32;
using Imm4 = u32;
using Imm5 = u32;

View file

@ -78,6 +78,11 @@ Arm::Reg Value::GetRegRef() const {
return inner.imm_regref;
}
Arm::ExtReg Value::GetExtRegRef() const {
DEBUG_ASSERT(type == Type::ExtRegRef);
return inner.imm_extregref;
}
Inst* Value::GetInst() const {
DEBUG_ASSERT(type == Type::Opaque);
return inner.inst;

View file

@ -33,16 +33,17 @@ namespace IR {
// A basic block is represented as an IR::Block.
enum class Type {
Void = 1 << 0,
RegRef = 1 << 1,
Opaque = 1 << 2,
U1 = 1 << 3,
U8 = 1 << 4,
U16 = 1 << 5,
U32 = 1 << 6,
U64 = 1 << 7,
F32 = 1 << 8,
F64 = 1 << 9,
Void = 1 << 0,
RegRef = 1 << 1,
ExtRegRef = 1 << 2,
Opaque = 1 << 3,
U1 = 1 << 4,
U8 = 1 << 5,
U16 = 1 << 6,
U32 = 1 << 7,
U64 = 1 << 8,
F32 = 1 << 9,
F64 = 1 << 10,
};
Type GetTypeOf(Opcode op);
@ -72,6 +73,10 @@ public:
inner.imm_regref = value;
}
explicit Value(Arm::ExtReg value) : type(Type::ExtRegRef) {
inner.imm_extregref = value;
}
explicit Value(bool value) : type(Type::U1) {
inner.imm_u1 = value;
}
@ -90,6 +95,7 @@ public:
Inst* GetInst() const;
Arm::Reg GetRegRef() const;
Arm::ExtReg GetExtRegRef() const;
bool GetU1() const;
u8 GetU8() const;
u32 GetU32() const;
@ -100,6 +106,7 @@ private:
union {
Inst* inst; // type == Type::Opaque
Arm::Reg imm_regref;
Arm::ExtReg imm_extregref;
bool imm_u1;
u8 imm_u8;
u32 imm_u32;

View file

@ -43,11 +43,31 @@ IR::Value IREmitter::GetRegister(Reg reg) {
return Inst(IR::Opcode::GetRegister, { IR::Value(reg) });
}
IR::Value IREmitter::GetExtendedRegister(ExtReg reg) {
if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) {
return Inst(IR::Opcode::GetExtendedRegister32, {IR::Value(reg)});
} else if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) {
return Inst(IR::Opcode::GetExtendedRegister64, {IR::Value(reg)});
} else {
ASSERT_MSG(false, "Invalid reg.");
}
}
void IREmitter::SetRegister(const Reg reg, const IR::Value& value) {
ASSERT(reg != Reg::PC);
Inst(IR::Opcode::SetRegister, { IR::Value(reg), value });
}
void IREmitter::SetExtendedRegister(const ExtReg reg, const IR::Value& value) {
if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) {
Inst(IR::Opcode::SetExtendedRegister32, {IR::Value(reg), value});
} else if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) {
Inst(IR::Opcode::SetExtendedRegister64, {IR::Value(reg), value});
} else {
ASSERT_MSG(false, "Invalid reg.");
}
}
void IREmitter::ALUWritePC(const IR::Value& value) {
// This behaviour is ARM version-dependent.
// The below implementation is for ARMv6k

View file

@ -40,7 +40,9 @@ public:
IR::Value Imm32(u32 value);
IR::Value GetRegister(Reg source_reg);
IR::Value GetExtendedRegister(ExtReg source_reg);
void SetRegister(const Reg dest_reg, const IR::Value& value);
void SetExtendedRegister(const ExtReg dest_reg, const IR::Value& value);
void ALUWritePC(const IR::Value& value);
void BranchWritePC(const IR::Value& value);

View file

@ -5,7 +5,11 @@ OPCODE(Breakpoint, T::Void,
// ARM Context getters/setters
OPCODE(GetRegister, T::U32, T::RegRef )
OPCODE(GetExtendedRegister32, T::F32, T::ExtRegRef )
OPCODE(GetExtendedRegister64, T::F64, T::ExtRegRef )
OPCODE(SetRegister, T::Void, T::RegRef, T::U32 )
OPCODE(SetExtendedRegister32, T::Void, T::ExtRegRef, T::F32 )
OPCODE(SetExtendedRegister64, T::Void, T::ExtRegRef, T::F64 )
OPCODE(GetNFlag, T::U1, )
OPCODE(SetNFlag, T::Void, T::U1 )
OPCODE(GetZFlag, T::U1, )

View file

@ -63,11 +63,17 @@ public:
/// View and modify registers.
std::array<u32, 16>& Regs();
std::array<u32, 16> Regs() const;
std::array<u32, 64>& ExtRegs();
std::array<u32, 64> ExtRegs() const;
/// View and modify CPSR.
u32& Cpsr();
u32 Cpsr() const;
/// View and modify FPSCR.
u32 Fpscr() const;
void SetFpscr(u32 value) const;
/**
* Returns true if Jit::Run was called but hasn't returned yet.
* i.e.: We're in a callback.

View file

@ -90,6 +90,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit) {
interp_state.NumInstrsToExecute = 1;
interp_state.Reg = jit->Regs();
interp_state.ExtReg = jit->ExtRegs();
interp_state.Cpsr = jit->Cpsr();
interp_state.Reg[15] = pc;
@ -100,6 +101,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit) {
interp_state.Reg[15] &= T ? 0xFFFFFFFE : 0xFFFFFFFC;
jit->Regs() = interp_state.Reg;
jit->ExtRegs() = interp_state.ExtReg;
jit->Cpsr() = interp_state.Cpsr;
}