From 4b31ea25a7a50f6d9bbdef317fb49b8c59b879b3 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 6 Aug 2016 17:21:29 +0100 Subject: [PATCH] VFP: Implement VADD.{F32,F64} --- src/CMakeLists.txt | 1 + src/backend_x64/emit_x64.cpp | 103 +++++++++++++++++ src/backend_x64/jitstate.cpp | 38 +++--- src/backend_x64/jitstate.h | 2 + src/frontend/arm_types.h | 3 + src/frontend/decoder/vfp2.h | 109 ++++++++++++++++++ .../disassembler/disassembler_arm.cpp | 25 +++- src/frontend/ir/ir_emitter.cpp | 10 ++ src/frontend/ir/ir_emitter.h | 3 + src/frontend/ir/opcodes.inc | 4 + src/frontend/translate/translate_arm.cpp | 6 +- .../translate/translate_arm/translate_arm.h | 3 + src/frontend/translate/translate_arm/vfp2.cpp | 38 ++++++ tests/arm/fuzz_arm.cpp | 32 +++-- 14 files changed, 350 insertions(+), 27 deletions(-) create mode 100644 src/frontend/decoder/vfp2.h create mode 100644 src/frontend/translate/translate_arm/vfp2.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 20924e48..a4f1a5b0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,6 +25,7 @@ set(SRCS frontend/translate/translate_arm/multiply.cpp frontend/translate/translate_arm/parallel.cpp frontend/translate/translate_arm/reversal.cpp + frontend/translate/translate_arm/vfp2.cpp frontend/translate/translate_thumb.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/get_set_elimination_pass.cpp diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 98c0f416..f00b6ca8 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -119,6 +119,7 @@ void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) { void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) { Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef(); ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31); + X64Reg result = reg_alloc.DefRegister(inst, any_xmm); code->MOVSS(result, MJitStateExtReg(reg)); } @@ -1005,6 +1006,108 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) { code->BSWAP(64, result); } +static void DenormalsAreZero32(XEmitter* code, X64Reg xmm_value, X64Reg gpr_scratch) { + // We need to report back whether we've found a denormal on input. + // SSE doesn't do this for us when SSE's DAZ is enabled. + code->MOVD_xmm(R(gpr_scratch), xmm_value); + code->AND(32, R(gpr_scratch), Imm32(0x7FFFFFFF)); + code->SUB(32, R(gpr_scratch), Imm32(1)); + code->CMP(32, R(gpr_scratch), Imm32(0x007FFFFE)); + auto fixup = code->J_CC(CC_A); + code->PXOR(xmm_value, R(xmm_value)); + code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_IDC)), Imm32(1 << 7)); + code->SetJumpTarget(fixup); +} + +static void DenormalsAreZero64(XEmitter* code, Routines* routines, X64Reg xmm_value, X64Reg gpr_scratch) { + code->MOVQ_xmm(R(gpr_scratch), xmm_value); + code->AND(64, R(gpr_scratch), routines->MFloatNonSignMask64()); + code->SUB(64, R(gpr_scratch), Imm32(1)); + code->CMP(64, R(gpr_scratch), routines->MFloatPenultimatePositiveDenormal64()); + auto fixup = code->J_CC(CC_A); + code->PXOR(xmm_value, R(xmm_value)); + code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_IDC)), Imm32(1 << 7)); + code->SetJumpTarget(fixup); +} + +static void FlushToZero32(XEmitter* code, X64Reg xmm_value, X64Reg gpr_scratch) { + code->MOVD_xmm(R(gpr_scratch), xmm_value); + code->AND(32, R(gpr_scratch), Imm32(0x7FFFFFFF)); + code->SUB(32, R(gpr_scratch), Imm32(1)); + code->CMP(32, R(gpr_scratch), Imm32(0x007FFFFE)); + auto fixup = code->J_CC(CC_A); + code->PXOR(xmm_value, R(xmm_value)); + code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_UFC)), Imm32(1 << 3)); + code->SetJumpTarget(fixup); +} + +static void FlushToZero64(XEmitter* code, Routines* routines, X64Reg xmm_value, X64Reg gpr_scratch) { + code->MOVQ_xmm(R(gpr_scratch), xmm_value); + code->AND(64, R(gpr_scratch), routines->MFloatNonSignMask64()); + code->SUB(64, R(gpr_scratch), Imm32(1)); + code->CMP(64, R(gpr_scratch), routines->MFloatPenultimatePositiveDenormal64()); + auto fixup = code->J_CC(CC_A); + code->PXOR(xmm_value, R(xmm_value)); + code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_UFC)), Imm32(1 << 3)); + code->SetJumpTarget(fixup); +} + +static void DefaultNaN32(XEmitter* code, Routines* routines, X64Reg xmm_value) { + code->UCOMISS(xmm_value, R(xmm_value)); + auto fixup = code->J_CC(CC_NP); + code->MOVAPS(xmm_value, routines->MFloatNaN32()); + code->SetJumpTarget(fixup); +} + +static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) { + code->UCOMISD(xmm_value, R(xmm_value)); + auto fixup = code->J_CC(CC_NP); + code->MOVAPS(xmm_value, routines->MFloatNaN64()); + code->SetJumpTarget(fixup); +} + +void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg operand = reg_alloc.UseRegister(b, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, result, gpr_scratch); + DenormalsAreZero32(code, operand, gpr_scratch); + } + code->ADDSS(result, R(operand)); + if (block.location.FPSCR_FTZ()) { + FlushToZero32(code, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN32(code, routines, result); + } +} + +void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg operand = reg_alloc.UseRegister(b, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, routines, result, gpr_scratch); + DenormalsAreZero64(code, routines, operand, gpr_scratch); + } + code->ADDSD(result, R(operand)); + if (block.location.FPSCR_FTZ()) { + FlushToZero64(code, routines, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN64(code, routines, result); + } +} + void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { reg_alloc.HostCall(inst, inst->GetArg(0)); diff --git a/src/backend_x64/jitstate.cpp b/src/backend_x64/jitstate.cpp index 2959dbe4..f7bba4dc 100644 --- a/src/backend_x64/jitstate.cpp +++ b/src/backend_x64/jitstate.cpp @@ -22,12 +22,12 @@ namespace BackendX64 { * UE bit 4 Underflow Flag * OE bit 3 Overflow Flag * ZE bit 2 Divide By Zero Flag - * DE bit 1 Denormal Flag + * DE bit 1 Denormal Flag // Appears to only be set when MXCSR.DAZ = 0 * IE bit 0 Invalid Operation Flag * * VFP FPSCR cumulative exception bits * ----------------------------------- - * IDC bit 7 Input Denormal cumulative exception bit + * IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1 * IXC bit 4 Inexact cumulative exception bit * UFC bit 3 Underflow cumulative exception bit * OFC bit 2 Overflow cumulative exception bit @@ -72,38 +72,44 @@ constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000; u32 JitState::Fpscr() const { ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0); + ASSERT((FPSCR_IDC & ~(1 << 7)) == 0); + ASSERT((FPSCR_UFC & ~(1 << 3)) == 0); u32 FPSCR = guest_FPSCR_flags; FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE - FPSCR |= (guest_MXCSR & 0b0000000000010) << 6; // IDC = DE FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE - - if (!Common::Bit<24>(FPSCR)) { - // ARM only sets IDC if FTZ == 1. - FPSCR &= ~(1 << 7); - FPSCR |= old_FPSCR & (1 << 7); - } + FPSCR |= FPSCR_IDC; + FPSCR |= FPSCR_UFC; return FPSCR; } void JitState::SetFpscr(u32 FPSCR) { old_FPSCR = FPSCR; - guest_FPSCR_flags = FPSCR & FPSCR_MASK; - guest_MXCSR = 0; + // Exception masks / enables + guest_MXCSR |= 0b1111110000000; // mask all + //guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE + //guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE + //guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE + + // RMode + const std::array MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; + guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; + + // Cumulative flags IOC, IXC, UFC, OFC, DZC guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC - guest_MXCSR |= ( FPSCR >> 6) & 0b0000000000010; // DE = IDC guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC - guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE - guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE - guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE + + // Cumulative flag IDC, UFC + FPSCR_IDC = FPSCR & (1 << 7); + FPSCR_UFC = FPSCR & (1 << 3); if (Common::Bit<24>(FPSCR)) { // VFP Flush to Zero - guest_MXCSR |= (1 << 15); // SSE Flush to Zero + //guest_MXCSR |= (1 << 15); // SSE Flush to Zero guest_MXCSR |= (1 << 6); // SSE Denormals are Zero } } diff --git a/src/backend_x64/jitstate.h b/src/backend_x64/jitstate.h index 7408a0e3..2efb23dd 100644 --- a/src/backend_x64/jitstate.h +++ b/src/backend_x64/jitstate.h @@ -30,6 +30,8 @@ struct JitState { u64 save_host_RSP = 0; s64 cycles_remaining = 0; + u32 FPSCR_IDC = 0; + u32 FPSCR_UFC = 0; u32 guest_FPSCR_flags = 0; u32 old_FPSCR = 0; u32 Fpscr() const; diff --git a/src/frontend/arm_types.h b/src/frontend/arm_types.h index 47033c52..cc128b87 100644 --- a/src/frontend/arm_types.h +++ b/src/frontend/arm_types.h @@ -11,6 +11,7 @@ #include #include +#include "common/bit_util.h" #include "common/common_types.h" namespace Dynarmic { @@ -96,6 +97,8 @@ struct LocationDescriptor { bool TFlag() const { return tflag; } bool EFlag() const { return eflag; } u32 FPSCR() const { return fpscr; } + bool FPSCR_FTZ() const { return Common::Bit<24>(fpscr); } + bool FPSCR_DN() const { return Common::Bit<25>(fpscr); } bool operator == (const LocationDescriptor& o) const { return std::tie(arm_pc, tflag, eflag, fpscr) == std::tie(o.arm_pc, o.tflag, o.eflag, o.fpscr); diff --git a/src/frontend/decoder/vfp2.h b/src/frontend/decoder/vfp2.h new file mode 100644 index 00000000..8989549e --- /dev/null +++ b/src/frontend/decoder/vfp2.h @@ -0,0 +1,109 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2032 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "frontend/decoder/decoder_detail.h" + +namespace Dynarmic { +namespace Arm { + +template +struct VFP2Matcher { + using CallRetT = typename mp::MemFnInfo::return_type; + + VFP2Matcher(const char* const name, u32 mask, u32 expect, std::function fn) + : name(name), mask(mask), expect(expect), fn(fn) {} + + /// Gets the name of this type of instruction. + const char* GetName() const { + return name; + } + + /** + * Tests to see if the instruction is this type of instruction. + * @param instruction The instruction to test + * @returns true if the instruction is + */ + bool Matches(u32 instruction) const { + return (instruction & mask) == expect; + } + + /** + * Calls the corresponding instruction handler on visitor for this type of instruction. + * @param v The visitor to use + * @param instruction The instruction to decode. + */ + CallRetT call(Visitor& v, u32 instruction) const { + assert(Matches(instruction)); + return fn(v, instruction); + } + +private: + const char* name; + u32 mask, expect; + std::function fn; +}; + +template +boost::optional&> DecodeVFP2(u32 instruction) { + const static std::vector> table = { + +#define INST(fn, name, bitstring) detail::detail::GetMatcher(fn, name, bitstring) + + // cccc1110________----101-__-0---- + + // Floating-point three-register data processing instructions + // VMLA + // VMLS + // VNMLA + // VNMLS + // VNMUL + // VMUL + INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"), + // VSUB + // VDIV + + // Floating-point other instructions + // VMOV_imm + // VMOV_reg + // VABS + // VNEG + // VSQRT + // VCMP + // VCMPE + // VCVT + // VCVTR + + // Extension register load-store instructions + // VSTR + // VSTM + // VSTMDB + // VPUSH + // VLDR + // VLDM + // VLDMDB + // VPOP + +#undef INST + + }; + + const auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); }; + + auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + return iter != table.end() ? boost::make_optional&>(*iter) : boost::none; +} + +} // namespace Arm +} // namespace Dynarmic diff --git a/src/frontend/disassembler/disassembler_arm.cpp b/src/frontend/disassembler/disassembler_arm.cpp index eb813f15..1e2f3c20 100644 --- a/src/frontend/disassembler/disassembler_arm.cpp +++ b/src/frontend/disassembler/disassembler_arm.cpp @@ -11,6 +11,7 @@ #include "common/string_util.h" #include "frontend/arm_types.h" #include "frontend/decoder/arm.h" +#include "frontend/decoder/vfp2.h" namespace Dynarmic { namespace Arm { @@ -81,6 +82,16 @@ public: return ""; } + std::string FPRegStr(bool dp_operation, size_t base, bool bit) { + size_t reg_num; + if (dp_operation) { + reg_num = base + (bit ? 16 : 0); + } else { + reg_num = (base << 1) + (bit ? 1 : 0); + } + return Common::StringFromFormat("%c%zu", dp_operation ? 'd' : 's', reg_num); + } + // Branch instructions std::string arm_B(Cond cond, Imm24 imm24) { s32 offset = Common::SignExtend<26, s32>(imm24 << 2) + 8; @@ -497,12 +508,22 @@ public: std::string arm_RFE() { return "ice"; } std::string arm_SETEND(bool E) { return "ice"; } std::string arm_SRS() { return "ice"; } + + // Floating point arithmetic instructions + std::string vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) { + return Common::StringFromFormat("vadd%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str()); + } }; std::string DisassembleArm(u32 instruction) { DisassemblerVisitor visitor; - auto decoder = DecodeArm(instruction); - return !decoder ? Common::StringFromFormat("UNKNOWN: %x", instruction) : decoder->call(visitor, instruction); + if (auto vfp_decoder = DecodeVFP2(instruction)) { + return vfp_decoder->call(visitor, instruction); + } else if (auto decoder = DecodeArm(instruction)) { + return decoder->call(visitor, instruction); + } else { + return Common::StringFromFormat("UNKNOWN: %x", instruction); + } } } // namespace Arm diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 75341ea9..7a702acf 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -282,6 +282,16 @@ IR::Value IREmitter::ByteReverseDual(const IR::Value& a) { return Inst(IR::Opcode::ByteReverseDual, {a}); } +IR::Value IREmitter::FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPAdd32, {a, b}); +} + +IR::Value IREmitter::FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPAdd64, {a, b}); +} + IR::Value IREmitter::ReadMemory8(const IR::Value& vaddr) { return Inst(IR::Opcode::ReadMemory8, {vaddr}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 060182b1..b102cf6e 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -93,6 +93,9 @@ public: IR::Value ByteReverseHalf(const IR::Value& a); IR::Value ByteReverseDual(const IR::Value& a); + IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); + IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); + IR::Value ReadMemory8(const IR::Value& vaddr); IR::Value ReadMemory16(const IR::Value& vaddr); IR::Value ReadMemory32(const IR::Value& vaddr); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index c216d2b7..68736934 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -58,6 +58,10 @@ OPCODE(ByteReverseWord, T::U32, T::U32 OPCODE(ByteReverseHalf, T::U16, T::U16 ) OPCODE(ByteReverseDual, T::U64, T::U64 ) +// Floating-point +OPCODE(FPAdd32, T::F32, T::F32, T::F32 ) +OPCODE(FPAdd64, T::F64, T::F64, T::F64 ) + // Memory access OPCODE(ReadMemory8, T::U8, T::U32 ) OPCODE(ReadMemory16, T::U16, T::U32 ) diff --git a/src/frontend/translate/translate_arm.cpp b/src/frontend/translate/translate_arm.cpp index 25f52676..16090d12 100644 --- a/src/frontend/translate/translate_arm.cpp +++ b/src/frontend/translate/translate_arm.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "frontend/arm_types.h" #include "frontend/decoder/arm.h" +#include "frontend/decoder/vfp2.h" #include "frontend/ir/ir.h" #include "frontend/translate/translate.h" #include "frontend/translate/translate_arm/translate_arm.h" @@ -22,8 +23,9 @@ IR::Block TranslateArm(LocationDescriptor descriptor, MemoryRead32FuncType memor const u32 arm_pc = visitor.ir.current_location.PC(); const u32 arm_instruction = (*memory_read_32)(arm_pc); - const auto decoder = DecodeArm(arm_instruction); - if (decoder) { + if (auto vfp_decoder = DecodeVFP2(arm_instruction)) { + should_continue = vfp_decoder->call(visitor, arm_instruction); + } else if (auto decoder = DecodeArm(arm_instruction)) { should_continue = decoder->call(visitor, arm_instruction); } else { should_continue = visitor.arm_UDF(); diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index e04c2a73..d49e63f7 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -317,6 +317,9 @@ struct ArmTranslatorVisitor final { bool arm_RFE() { return InterpretThisInstruction(); } bool arm_SETEND(bool E) { return InterpretThisInstruction(); } bool arm_SRS() { return InterpretThisInstruction(); } + + // Floating-point three-register data processing instructions + bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm); }; } // namespace Arm diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp new file mode 100644 index 00000000..7704c0d8 --- /dev/null +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -0,0 +1,38 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "translate_arm.h" + +namespace Dynarmic { +namespace Arm { + +static ExtReg ToExtReg(bool sz, size_t base, bool bit) { + if (sz) { + return static_cast(static_cast(ExtReg::D0) + base + (bit ? 16 : 0)); + } else { + return static_cast(static_cast(ExtReg::S0) + (base << 1) + (bit ? 1 : 0)); + } +} + +bool ArmTranslatorVisitor::vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) { + // TODO: if (FSPCR.len || FPSCR.stride) return InterpretThisInstruction(); + ExtReg d = ToExtReg(sz, Vd, D); + ExtReg n = ToExtReg(sz, Vn, N); + ExtReg m = ToExtReg(sz, Vm, M); + // VADD.{F32,F64} <{S,D}d>, <{S,D}n>, <{S,D}m> + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(n); + auto b = ir.GetExtendedRegister(m); + auto result = sz + ? ir.FPAdd64(a, b, true) + : ir.FPAdd32(a, b, true); + ir.SetExtendedRegister(d, result); + } + return true; +} + +} // namespace Arm +} // namespace Dynarmic diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index d1c2431f..d075eb38 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -174,15 +174,13 @@ private: }; static bool DoesBehaviorMatch(const ARMul_State& interp, const Dynarmic::Jit& jit, const std::vector& interp_write_records, const std::vector& jit_write_records) { - const auto interp_regs = interp.Reg; - const auto jit_regs = jit.Regs(); - - return std::equal(interp_regs.begin(), interp_regs.end(), jit_regs.begin(), jit_regs.end()) + return interp.Reg == jit.Regs() + && interp.ExtReg == jit.ExtRegs() && interp.Cpsr == jit.Cpsr() + && interp.VFP[VFP_FPSCR] == jit.Fpscr() && interp_write_records == jit_write_records; } - void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_execute_count, const size_t run_count, const std::function instruction_generator) { // Prepare memory code_mem.fill(0xEAFFFFFE); // b +#0 @@ -199,14 +197,25 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe // Setup initial state + u32 initial_cpsr = 0x000001D0; + std::array initial_regs; std::generate_n(initial_regs.begin(), 15, []{ return RandInt(0, 0xFFFFFFFF); }); initial_regs[15] = 0; - interp.Cpsr = 0x000001D0; + std::array initial_extregs; + std::generate_n(initial_extregs.begin(), 64, []{ return RandInt(0, 0xFFFFFFFF); }); + + u32 initial_fpscr = RandInt(0x0, 0x1) << 24; + + interp.Cpsr = initial_cpsr; interp.Reg = initial_regs; - jit.Cpsr() = 0x000001D0; + interp.ExtReg = initial_extregs; + interp.VFP[VFP_FPSCR] = initial_fpscr; + jit.Cpsr() = initial_cpsr; jit.Regs() = initial_regs; + jit.ExtRegs() = initial_extregs; + jit.SetFpscr(initial_fpscr); std::generate_n(code_mem.begin(), instruction_count, instruction_generator); @@ -239,6 +248,11 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe auto reg = Dynarmic::Arm::RegToString(static_cast(i)); printf("%4s: %08x\n", reg, initial_regs[i]); } + printf("CPSR: %08x\n", initial_cpsr); + printf("FPSCR:%08x\n", initial_fpscr); + for (int i = 0; i <= 63; i++) { + printf("S%3i: %08x\n", i, initial_extregs[i]); + } printf("\nFinal Register Listing: \n"); printf(" interp jit\n"); @@ -247,6 +261,10 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe printf("%4s: %08x %08x %s\n", reg, interp.Reg[i], jit.Regs()[i], interp.Reg[i] != jit.Regs()[i] ? "*" : ""); } printf("CPSR: %08x %08x %s\n", interp.Cpsr, jit.Cpsr(), interp.Cpsr != jit.Cpsr() ? "*" : ""); + printf("FPSCR:%08x %08x %s\n", interp.VFP[VFP_FPSCR], jit.Fpscr(), interp.VFP[VFP_FPSCR] != jit.Fpscr() ? "*" : ""); + for (int i = 0; i <= 63; i++) { + printf("S%3i: %08x %08x %s\n", i, interp.ExtReg[i], jit.ExtRegs()[i], interp.ExtReg[i] != jit.ExtRegs()[i] ? "*" : ""); + } printf("\nInterp Write Records:\n"); for (auto& record : interp_write_records) {