From 640ce48baa8b219bd948a56af737be58cd5a94e6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Fri, 5 Aug 2016 18:54:19 +0100 Subject: [PATCH] VFP: Implement {Get,Set}ExtendedRegister{32,64} --- src/CMakeLists.txt | 1 + src/backend_x64/emit_x64.cpp | 44 +++++++++++- src/backend_x64/interface_x64.cpp | 19 ++++- src/backend_x64/jitstate.cpp | 113 ++++++++++++++++++++++++++++++ src/backend_x64/jitstate.h | 10 ++- src/backend_x64/reg_alloc.cpp | 9 ++- src/backend_x64/routines.cpp | 5 ++ src/frontend/arm_types.h | 11 +++ src/frontend/ir/ir.cpp | 5 ++ src/frontend/ir/ir.h | 27 ++++--- src/frontend/ir/ir_emitter.cpp | 20 ++++++ src/frontend/ir/ir_emitter.h | 2 + src/frontend/ir/opcodes.inc | 4 ++ src/interface/interface.h | 6 ++ tests/arm/fuzz_arm.cpp | 2 + 15 files changed, 262 insertions(+), 16 deletions(-) create mode 100644 src/backend_x64/jitstate.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 31331121..20924e48 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ include_directories(.) set(SRCS backend_x64/emit_x64.cpp backend_x64/interface_x64.cpp + backend_x64/jitstate.cpp backend_x64/reg_alloc.cpp backend_x64/routines.cpp common/memory_util.cpp diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 2c93659e..dfe0e841 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -26,6 +26,18 @@ static OpArg MJitStateReg(Arm::Reg reg) { return MDisp(R15, offsetof(JitState, Reg) + sizeof(u32) * static_cast(reg)); } +static OpArg MJitStateExtReg(Arm::ExtReg reg) { + if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) { + size_t index = static_cast(reg) - static_cast(Arm::ExtReg::S0); + return MDisp(R15, int(offsetof(JitState, ExtReg) + sizeof(u32) * index)); + } + if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) { + size_t index = static_cast(reg) - static_cast(Arm::ExtReg::D0); + return MDisp(R15, int(offsetof(JitState, ExtReg) + sizeof(u64) * index)); + } + ASSERT_MSG(false, "Should never happen."); +} + static OpArg MJitStateCpsr() { return MDisp(R15, offsetof(JitState, Cpsr)); } @@ -104,6 +116,20 @@ void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) { code->MOV(32, R(result), MJitStateReg(reg)); } +void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) { + Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); + ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31); + X64Reg result = reg_alloc.DefRegister(inst, any_xmm); + code->MOVSS(result, MJitStateExtReg(reg)); +} + +void EmitX64::EmitGetExtendedRegister64(IR::Block&, IR::Inst* inst) { + Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); + ASSERT(reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31); + X64Reg result = reg_alloc.DefRegister(inst, any_xmm); + code->MOVSD(result, MJitStateExtReg(reg)); +} + void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) { Arm::Reg reg = inst->GetArg(0).GetRegRef(); IR::Value arg = inst->GetArg(1); @@ -115,6 +141,20 @@ void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) { } } +void EmitX64::EmitSetExtendedRegister32(IR::Block&, IR::Inst* inst) { + Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); + ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31); + X64Reg source = reg_alloc.UseRegister(inst->GetArg(1), any_xmm); + code->MOVSS(MJitStateExtReg(reg), source); +} + +void EmitX64::EmitSetExtendedRegister64(IR::Block&, IR::Inst* inst) { + Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); + ASSERT(reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31); + X64Reg source = reg_alloc.UseRegister(inst->GetArg(1), any_xmm); + code->MOVSD(MJitStateExtReg(reg), source); +} + void EmitX64::EmitGetNFlag(IR::Block&, IR::Inst* inst) { X64Reg result = reg_alloc.DefRegister(inst, any_gpr); code->MOV(32, R(result), MJitStateCpsr()); @@ -1128,7 +1168,7 @@ void EmitX64::EmitCondPrelude(Arm::Cond cond, CCFlags cc = EmitCond(code, cond); // TODO: Improve, maybe. - auto fixup = code->J_CC(cc); + auto fixup = code->J_CC(cc, true); EmitAddCycles(1); // TODO: Proper cycle count EmitTerminalLinkBlock(IR::Term::LinkBlock{cond_failed.get()}, initial_location); code->SetJumpTarget(fixup); @@ -1205,7 +1245,7 @@ void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescript void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) { CCFlags cc = EmitCond(code, terminal.if_); - auto fixup = code->J_CC(cc); + auto fixup = code->J_CC(cc, true); EmitTerminal(terminal.else_, initial_location); code->SetJumpTarget(fixup); EmitTerminal(terminal.then_, initial_location); diff --git a/src/backend_x64/interface_x64.cpp b/src/backend_x64/interface_x64.cpp index 0e3ba207..23595746 100644 --- a/src/backend_x64/interface_x64.cpp +++ b/src/backend_x64/interface_x64.cpp @@ -48,7 +48,7 @@ struct Jit::Impl { bool TFlag = Common::Bit<5>(jit_state.Cpsr); bool EFlag = Common::Bit<9>(jit_state.Cpsr); - Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.Fpscr}; + Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.guest_FPSCR_flags}; CodePtr code_ptr = GetBasicBlock(descriptor)->code_ptr; return routines.RunCode(&jit_state, code_ptr, cycle_count); @@ -150,13 +150,30 @@ std::array Jit::Regs() const { return impl->jit_state.Reg; } +std::array& Jit::ExtRegs() { + return impl->jit_state.ExtReg; +} + +std::array Jit::ExtRegs() const { + return impl->jit_state.ExtReg; +} + u32& Jit::Cpsr() { return impl->jit_state.Cpsr; } + u32 Jit::Cpsr() const { return impl->jit_state.Cpsr; } +u32 Jit::Fpscr() const { + return impl->jit_state.Fpscr(); +} + +void Jit::SetFpscr(u32 value) const { + return impl->jit_state.SetFpscr(value); +} + std::string Jit::Disassemble(Arm::LocationDescriptor descriptor) { return impl->Disassemble(descriptor); } diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp new file mode 100644 index 00000000..2959dbe4 --- /dev/null +++ b/src/backend_x64/jitstate.cpp @@ -0,0 +1,113 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "backend_x64/jitstate.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" + +namespace Dynarmic { +namespace BackendX64 { + +/** + * Comparing MXCSR and FPSCR + * ========================= + * + * SSE MXCSR exception flags + * ------------------------- + * PE bit 5 Precision Flag + * UE bit 4 Underflow Flag + * OE bit 3 Overflow Flag + * ZE bit 2 Divide By Zero Flag + * DE bit 1 Denormal Flag + * IE bit 0 Invalid Operation Flag + * + * VFP FPSCR cumulative exception bits + * ----------------------------------- + * IDC bit 7 Input Denormal cumulative exception bit + * IXC bit 4 Inexact cumulative exception bit + * UFC bit 3 Underflow cumulative exception bit + * OFC bit 2 Overflow cumulative exception bit + * DZC bit 1 Division by Zero cumulative exception bit + * IOC bit 0 Invalid Operation cumulative exception bit + * + * SSE MSCSR exception masks + * ------------------------- + * PM bit 12 Precision Mask + * UM bit 11 Underflow Mask + * OM bit 10 Overflow Mask + * ZM bit 9 Divide By Zero Mask + * DM bit 8 Denormal Mask + * IM bit 7 Invalid Operation Mask + * + * VFP FPSCR exception trap enables + * -------------------------------- + * IDE bit 15 Input Denormal exception trap enable + * IXE bit 12 Inexact exception trap enable + * UFE bit 11 Underflow exception trap enable + * OFE bit 10 Overflow exception trap enable + * DZE bit 9 Division by Zero exception trap enable + * IOE bit 8 Invalid Operation exception trap enable + * + * SSE MXCSR mode bits + * ------------------- + * FZ bit 15 Flush To Zero + * DAZ bit 6 Denormals Are Zero + * RN bits 13-14 Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero} + * + * VFP FPSCR mode bits + * ------------------- + * DN bit 25 Default NaN + * FZ bit 24 Flush to Zero + * RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero} + * Stride bits 20-21 Vector stride + * Len bits 16-18 Vector length + */ + +// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits +constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000; + +u32 JitState::Fpscr() const { + ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0); + + u32 FPSCR = guest_FPSCR_flags; + FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE + FPSCR |= (guest_MXCSR & 0b0000000000010) << 6; // IDC = DE + FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE + + if (!Common::Bit<24>(FPSCR)) { + // ARM only sets IDC if FTZ == 1. + FPSCR &= ~(1 << 7); + FPSCR |= old_FPSCR & (1 << 7); + } + + return FPSCR; +} + +void JitState::SetFpscr(u32 FPSCR) { + old_FPSCR = FPSCR; + + guest_FPSCR_flags = FPSCR & FPSCR_MASK; + + guest_MXCSR = 0; + + guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC + guest_MXCSR |= ( FPSCR >> 6) & 0b0000000000010; // DE = IDC + guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC + guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE + guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE + guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE + + if (Common::Bit<24>(FPSCR)) { + // VFP Flush to Zero + guest_MXCSR |= (1 << 15); // SSE Flush to Zero + guest_MXCSR |= (1 << 6); // SSE Denormals are Zero + } +} + + +} // namespace BackendX64 +} // namespace Dynarmic diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 89c1f8a9..7408a0e3 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -3,6 +3,7 @@ * This software may be used and distributed according to the terms of the GNU * General Public License version 2 or any later version. */ + #pragma once #include @@ -19,13 +20,20 @@ struct JitState { std::array Reg{}; // Current register file. // TODO: Mode-specific register sets unimplemented. - u32 Fpscr = 0; + alignas(u64) std::array ExtReg{}; // Extension registers. std::array Spill{}; // Spill. // For internal use (See: Routines::RunCode) + u32 guest_MXCSR = 0; + u32 save_host_MXCSR = 0; u64 save_host_RSP = 0; s64 cycles_remaining = 0; + + u32 guest_FPSCR_flags = 0; + u32 old_FPSCR = 0; + u32 Fpscr() const; + void SetFpscr(u32 FPSCR); }; using CodePtr = const u8*; diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 1cd4d47c..9671db28 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -111,9 +111,14 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, Hos } } - Gen::X64Reg use_reg = UseRegister(use_inst, any_gpr); + bool is_floating_point = use_inst->GetType() == IR::Type::F32 || use_inst->GetType() == IR::Type::F64; + Gen::X64Reg use_reg = UseRegister(use_inst, is_floating_point ? any_xmm : any_gpr); Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations); - code->MOV(64, Gen::R(def_reg), Gen::R(use_reg)); + if (is_floating_point) { + code->MOVAPD(def_reg, Gen::R(use_reg)); + } else { + code->MOV(64, Gen::R(def_reg), Gen::R(use_reg)); + } return def_reg; } diff --git a/src/backend_x64/routines.cpp b/src/backend_x64/routines.cpp index 7c84ef01..690fb4ef 100644 --- a/src/backend_x64/routines.cpp +++ b/src/backend_x64/routines.cpp @@ -41,12 +41,17 @@ void Routines::GenRunCode() { MOV(64, R(R15), R(ABI_PARAM1)); MOV(64, MDisp(R15, offsetof(JitState, save_host_RSP)), R(RSP)); + STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR))); + LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR))); JMPptr(R(ABI_PARAM2)); } void Routines::GenReturnFromRunCode(XEmitter* code) const { + code->STMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR))); + code->LDMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR))); code->MOV(64, R(RSP), MDisp(R15, offsetof(JitState, save_host_RSP))); + code->ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); code->RET(); } diff --git a/src/frontend/arm_types.h b/src/frontend/arm_types.h index b1ed29df..47033c52 100644 --- a/src/frontend/arm_types.h +++ b/src/frontend/arm_types.h @@ -51,6 +51,17 @@ inline const char* RegToString(Reg reg) { return reg_strs.at(static_cast(reg)); } +enum class ExtReg { + S0, S1, S2, S3, S4, S5, S6, S7, + S8, S9, S10, S11, S12, S13, S14, S15, + S16, S17, S18, S19, S20, S21, S22, S23, + S24, S25, S26, S27, S28, S29, S30, S31, + D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, +}; + using Imm3 = u32; using Imm4 = u32; using Imm5 = u32; diff --git a/src/frontend/ir/ir.cpp b/src/frontend/ir/ir.cpp index c81b5118..e30eefda 100644 --- a/src/frontend/ir/ir.cpp +++ b/src/frontend/ir/ir.cpp @@ -78,6 +78,11 @@ Arm::Reg Value::GetRegRef() const { return inner.imm_regref; } +Arm::ExtReg Value::GetExtRegRef() const { + DEBUG_ASSERT(type == Type::ExtRegRef); + return inner.imm_extregref; +} + Inst* Value::GetInst() const { DEBUG_ASSERT(type == Type::Opaque); return inner.inst; diff --git a/src/frontend/ir/ir.h b/src/frontend/ir/ir.h index c9871da5..1097d8eb 100644 --- a/src/frontend/ir/ir.h +++ b/src/frontend/ir/ir.h @@ -33,16 +33,17 @@ namespace IR { // A basic block is represented as an IR::Block. enum class Type { - Void = 1 << 0, - RegRef = 1 << 1, - Opaque = 1 << 2, - U1 = 1 << 3, - U8 = 1 << 4, - U16 = 1 << 5, - U32 = 1 << 6, - U64 = 1 << 7, - F32 = 1 << 8, - F64 = 1 << 9, + Void = 1 << 0, + RegRef = 1 << 1, + ExtRegRef = 1 << 2, + Opaque = 1 << 3, + U1 = 1 << 4, + U8 = 1 << 5, + U16 = 1 << 6, + U32 = 1 << 7, + U64 = 1 << 8, + F32 = 1 << 9, + F64 = 1 << 10, }; Type GetTypeOf(Opcode op); @@ -72,6 +73,10 @@ public: inner.imm_regref = value; } + explicit Value(Arm::ExtReg value) : type(Type::ExtRegRef) { + inner.imm_extregref = value; + } + explicit Value(bool value) : type(Type::U1) { inner.imm_u1 = value; } @@ -90,6 +95,7 @@ public: Inst* GetInst() const; Arm::Reg GetRegRef() const; + Arm::ExtReg GetExtRegRef() const; bool GetU1() const; u8 GetU8() const; u32 GetU32() const; @@ -100,6 +106,7 @@ private: union { Inst* inst; // type == Type::Opaque Arm::Reg imm_regref; + Arm::ExtReg imm_extregref; bool imm_u1; u8 imm_u8; u32 imm_u32; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 2182c1bb..75341ea9 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -43,11 +43,31 @@ IR::Value IREmitter::GetRegister(Reg reg) { return Inst(IR::Opcode::GetRegister, { IR::Value(reg) }); } +IR::Value IREmitter::GetExtendedRegister(ExtReg reg) { + if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) { + return Inst(IR::Opcode::GetExtendedRegister32, {IR::Value(reg)}); + } else if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) { + return Inst(IR::Opcode::GetExtendedRegister64, {IR::Value(reg)}); + } else { + ASSERT_MSG(false, "Invalid reg."); + } +} + void IREmitter::SetRegister(const Reg reg, const IR::Value& value) { ASSERT(reg != Reg::PC); Inst(IR::Opcode::SetRegister, { IR::Value(reg), value }); } +void IREmitter::SetExtendedRegister(const ExtReg reg, const IR::Value& value) { + if (reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31) { + Inst(IR::Opcode::SetExtendedRegister32, {IR::Value(reg), value}); + } else if (reg >= Arm::ExtReg::D0 && reg <= Arm::ExtReg::D31) { + Inst(IR::Opcode::SetExtendedRegister64, {IR::Value(reg), value}); + } else { + ASSERT_MSG(false, "Invalid reg."); + } +} + void IREmitter::ALUWritePC(const IR::Value& value) { // This behaviour is ARM version-dependent. // The below implementation is for ARMv6k diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a2e6cfc7..060182b1 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -40,7 +40,9 @@ public: IR::Value Imm32(u32 value); IR::Value GetRegister(Reg source_reg); + IR::Value GetExtendedRegister(ExtReg source_reg); void SetRegister(const Reg dest_reg, const IR::Value& value); + void SetExtendedRegister(const ExtReg dest_reg, const IR::Value& value); void ALUWritePC(const IR::Value& value); void BranchWritePC(const IR::Value& value); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 03c7d121..c216d2b7 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -5,7 +5,11 @@ OPCODE(Breakpoint, T::Void, // ARM Context getters/setters OPCODE(GetRegister, T::U32, T::RegRef ) +OPCODE(GetExtendedRegister32, T::F32, T::ExtRegRef ) +OPCODE(GetExtendedRegister64, T::F64, T::ExtRegRef ) OPCODE(SetRegister, T::Void, T::RegRef, T::U32 ) +OPCODE(SetExtendedRegister32, T::Void, T::ExtRegRef, T::F32 ) +OPCODE(SetExtendedRegister64, T::Void, T::ExtRegRef, T::F64 ) OPCODE(GetNFlag, T::U1, ) OPCODE(SetNFlag, T::Void, T::U1 ) OPCODE(GetZFlag, T::U1, ) diff --git a/src/interface/interface.h b/src/interface/interface.h index b178b0aa..aa2f32cb 100644 --- a/src/interface/interface.h +++ b/src/interface/interface.h @@ -63,11 +63,17 @@ public: /// View and modify registers. std::array& Regs(); std::array Regs() const; + std::array& ExtRegs(); + std::array ExtRegs() const; /// View and modify CPSR. u32& Cpsr(); u32 Cpsr() const; + /// View and modify FPSCR. + u32 Fpscr() const; + void SetFpscr(u32 value) const; + /** * Returns true if Jit::Run was called but hasn't returned yet. * i.e.: We're in a callback. diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index 94d03559..40a7e39b 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -90,6 +90,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit) { interp_state.NumInstrsToExecute = 1; interp_state.Reg = jit->Regs(); + interp_state.ExtReg = jit->ExtRegs(); interp_state.Cpsr = jit->Cpsr(); interp_state.Reg[15] = pc; @@ -100,6 +101,7 @@ static void InterpreterFallback(u32 pc, Dynarmic::Jit* jit) { interp_state.Reg[15] &= T ? 0xFFFFFFFE : 0xFFFFFFFC; jit->Regs() = interp_state.Reg; + jit->ExtRegs() = interp_state.ExtReg; jit->Cpsr() = interp_state.Cpsr; }