VFP: Implement VADD.{F32,F64}
This commit is contained in:
parent
8ff414ee0e
commit
4b31ea25a7
14 changed files with 350 additions and 27 deletions
|
@ -25,6 +25,7 @@ set(SRCS
|
||||||
frontend/translate/translate_arm/multiply.cpp
|
frontend/translate/translate_arm/multiply.cpp
|
||||||
frontend/translate/translate_arm/parallel.cpp
|
frontend/translate/translate_arm/parallel.cpp
|
||||||
frontend/translate/translate_arm/reversal.cpp
|
frontend/translate/translate_arm/reversal.cpp
|
||||||
|
frontend/translate/translate_arm/vfp2.cpp
|
||||||
frontend/translate/translate_thumb.cpp
|
frontend/translate/translate_thumb.cpp
|
||||||
ir_opt/dead_code_elimination_pass.cpp
|
ir_opt/dead_code_elimination_pass.cpp
|
||||||
ir_opt/get_set_elimination_pass.cpp
|
ir_opt/get_set_elimination_pass.cpp
|
||||||
|
|
|
@ -119,6 +119,7 @@ void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) {
|
||||||
void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) {
|
||||||
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
||||||
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
|
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
|
||||||
|
|
||||||
X64Reg result = reg_alloc.DefRegister(inst, any_xmm);
|
X64Reg result = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
code->MOVSS(result, MJitStateExtReg(reg));
|
code->MOVSS(result, MJitStateExtReg(reg));
|
||||||
}
|
}
|
||||||
|
@ -1005,6 +1006,108 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
|
||||||
code->BSWAP(64, result);
|
code->BSWAP(64, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void DenormalsAreZero32(XEmitter* code, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||||
|
// We need to report back whether we've found a denormal on input.
|
||||||
|
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
||||||
|
code->MOVD_xmm(R(gpr_scratch), xmm_value);
|
||||||
|
code->AND(32, R(gpr_scratch), Imm32(0x7FFFFFFF));
|
||||||
|
code->SUB(32, R(gpr_scratch), Imm32(1));
|
||||||
|
code->CMP(32, R(gpr_scratch), Imm32(0x007FFFFE));
|
||||||
|
auto fixup = code->J_CC(CC_A);
|
||||||
|
code->PXOR(xmm_value, R(xmm_value));
|
||||||
|
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_IDC)), Imm32(1 << 7));
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DenormalsAreZero64(XEmitter* code, Routines* routines, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||||
|
code->MOVQ_xmm(R(gpr_scratch), xmm_value);
|
||||||
|
code->AND(64, R(gpr_scratch), routines->MFloatNonSignMask64());
|
||||||
|
code->SUB(64, R(gpr_scratch), Imm32(1));
|
||||||
|
code->CMP(64, R(gpr_scratch), routines->MFloatPenultimatePositiveDenormal64());
|
||||||
|
auto fixup = code->J_CC(CC_A);
|
||||||
|
code->PXOR(xmm_value, R(xmm_value));
|
||||||
|
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_IDC)), Imm32(1 << 7));
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void FlushToZero32(XEmitter* code, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||||
|
code->MOVD_xmm(R(gpr_scratch), xmm_value);
|
||||||
|
code->AND(32, R(gpr_scratch), Imm32(0x7FFFFFFF));
|
||||||
|
code->SUB(32, R(gpr_scratch), Imm32(1));
|
||||||
|
code->CMP(32, R(gpr_scratch), Imm32(0x007FFFFE));
|
||||||
|
auto fixup = code->J_CC(CC_A);
|
||||||
|
code->PXOR(xmm_value, R(xmm_value));
|
||||||
|
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_UFC)), Imm32(1 << 3));
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void FlushToZero64(XEmitter* code, Routines* routines, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||||
|
code->MOVQ_xmm(R(gpr_scratch), xmm_value);
|
||||||
|
code->AND(64, R(gpr_scratch), routines->MFloatNonSignMask64());
|
||||||
|
code->SUB(64, R(gpr_scratch), Imm32(1));
|
||||||
|
code->CMP(64, R(gpr_scratch), routines->MFloatPenultimatePositiveDenormal64());
|
||||||
|
auto fixup = code->J_CC(CC_A);
|
||||||
|
code->PXOR(xmm_value, R(xmm_value));
|
||||||
|
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_UFC)), Imm32(1 << 3));
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DefaultNaN32(XEmitter* code, Routines* routines, X64Reg xmm_value) {
|
||||||
|
code->UCOMISS(xmm_value, R(xmm_value));
|
||||||
|
auto fixup = code->J_CC(CC_NP);
|
||||||
|
code->MOVAPS(xmm_value, routines->MFloatNaN32());
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) {
|
||||||
|
code->UCOMISD(xmm_value, R(xmm_value));
|
||||||
|
auto fixup = code->J_CC(CC_NP);
|
||||||
|
code->MOVAPS(xmm_value, routines->MFloatNaN64());
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||||
|
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero32(code, result, gpr_scratch);
|
||||||
|
DenormalsAreZero32(code, operand, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->ADDSS(result, R(operand));
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
FlushToZero32(code, result, gpr_scratch);
|
||||||
|
}
|
||||||
|
if (block.location.FPSCR_DN()) {
|
||||||
|
DefaultNaN32(code, routines, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||||
|
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero64(code, routines, result, gpr_scratch);
|
||||||
|
DenormalsAreZero64(code, routines, operand, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->ADDSD(result, R(operand));
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
FlushToZero64(code, routines, result, gpr_scratch);
|
||||||
|
}
|
||||||
|
if (block.location.FPSCR_DN()) {
|
||||||
|
DefaultNaN64(code, routines, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||||
|
|
||||||
|
|
|
@ -22,12 +22,12 @@ namespace BackendX64 {
|
||||||
* UE bit 4 Underflow Flag
|
* UE bit 4 Underflow Flag
|
||||||
* OE bit 3 Overflow Flag
|
* OE bit 3 Overflow Flag
|
||||||
* ZE bit 2 Divide By Zero Flag
|
* ZE bit 2 Divide By Zero Flag
|
||||||
* DE bit 1 Denormal Flag
|
* DE bit 1 Denormal Flag // Appears to only be set when MXCSR.DAZ = 0
|
||||||
* IE bit 0 Invalid Operation Flag
|
* IE bit 0 Invalid Operation Flag
|
||||||
*
|
*
|
||||||
* VFP FPSCR cumulative exception bits
|
* VFP FPSCR cumulative exception bits
|
||||||
* -----------------------------------
|
* -----------------------------------
|
||||||
* IDC bit 7 Input Denormal cumulative exception bit
|
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
|
||||||
* IXC bit 4 Inexact cumulative exception bit
|
* IXC bit 4 Inexact cumulative exception bit
|
||||||
* UFC bit 3 Underflow cumulative exception bit
|
* UFC bit 3 Underflow cumulative exception bit
|
||||||
* OFC bit 2 Overflow cumulative exception bit
|
* OFC bit 2 Overflow cumulative exception bit
|
||||||
|
@ -72,38 +72,44 @@ constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000;
|
||||||
|
|
||||||
u32 JitState::Fpscr() const {
|
u32 JitState::Fpscr() const {
|
||||||
ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0);
|
ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0);
|
||||||
|
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
|
||||||
|
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
|
||||||
|
|
||||||
u32 FPSCR = guest_FPSCR_flags;
|
u32 FPSCR = guest_FPSCR_flags;
|
||||||
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||||
FPSCR |= (guest_MXCSR & 0b0000000000010) << 6; // IDC = DE
|
|
||||||
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||||
|
FPSCR |= FPSCR_IDC;
|
||||||
if (!Common::Bit<24>(FPSCR)) {
|
FPSCR |= FPSCR_UFC;
|
||||||
// ARM only sets IDC if FTZ == 1.
|
|
||||||
FPSCR &= ~(1 << 7);
|
|
||||||
FPSCR |= old_FPSCR & (1 << 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
return FPSCR;
|
return FPSCR;
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitState::SetFpscr(u32 FPSCR) {
|
void JitState::SetFpscr(u32 FPSCR) {
|
||||||
old_FPSCR = FPSCR;
|
old_FPSCR = FPSCR;
|
||||||
|
|
||||||
guest_FPSCR_flags = FPSCR & FPSCR_MASK;
|
guest_FPSCR_flags = FPSCR & FPSCR_MASK;
|
||||||
|
|
||||||
guest_MXCSR = 0;
|
guest_MXCSR = 0;
|
||||||
|
|
||||||
|
// Exception masks / enables
|
||||||
|
guest_MXCSR |= 0b1111110000000; // mask all
|
||||||
|
//guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE
|
||||||
|
//guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE
|
||||||
|
//guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE
|
||||||
|
|
||||||
|
// RMode
|
||||||
|
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
||||||
|
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
||||||
|
|
||||||
|
// Cumulative flags IOC, IXC, UFC, OFC, DZC
|
||||||
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC
|
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC
|
||||||
guest_MXCSR |= ( FPSCR >> 6) & 0b0000000000010; // DE = IDC
|
|
||||||
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
|
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
|
||||||
guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE
|
|
||||||
guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE
|
// Cumulative flag IDC, UFC
|
||||||
guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE
|
FPSCR_IDC = FPSCR & (1 << 7);
|
||||||
|
FPSCR_UFC = FPSCR & (1 << 3);
|
||||||
|
|
||||||
if (Common::Bit<24>(FPSCR)) {
|
if (Common::Bit<24>(FPSCR)) {
|
||||||
// VFP Flush to Zero
|
// VFP Flush to Zero
|
||||||
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
//guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||||
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,8 @@ struct JitState {
|
||||||
u64 save_host_RSP = 0;
|
u64 save_host_RSP = 0;
|
||||||
s64 cycles_remaining = 0;
|
s64 cycles_remaining = 0;
|
||||||
|
|
||||||
|
u32 FPSCR_IDC = 0;
|
||||||
|
u32 FPSCR_UFC = 0;
|
||||||
u32 guest_FPSCR_flags = 0;
|
u32 guest_FPSCR_flags = 0;
|
||||||
u32 old_FPSCR = 0;
|
u32 old_FPSCR = 0;
|
||||||
u32 Fpscr() const;
|
u32 Fpscr() const;
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include "common/bit_util.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
|
@ -96,6 +97,8 @@ struct LocationDescriptor {
|
||||||
bool TFlag() const { return tflag; }
|
bool TFlag() const { return tflag; }
|
||||||
bool EFlag() const { return eflag; }
|
bool EFlag() const { return eflag; }
|
||||||
u32 FPSCR() const { return fpscr; }
|
u32 FPSCR() const { return fpscr; }
|
||||||
|
bool FPSCR_FTZ() const { return Common::Bit<24>(fpscr); }
|
||||||
|
bool FPSCR_DN() const { return Common::Bit<25>(fpscr); }
|
||||||
|
|
||||||
bool operator == (const LocationDescriptor& o) const {
|
bool operator == (const LocationDescriptor& o) const {
|
||||||
return std::tie(arm_pc, tflag, eflag, fpscr) == std::tie(o.arm_pc, o.tflag, o.eflag, o.fpscr);
|
return std::tie(arm_pc, tflag, eflag, fpscr) == std::tie(o.arm_pc, o.tflag, o.eflag, o.fpscr);
|
||||||
|
|
109
src/frontend/decoder/vfp2.h
Normal file
109
src/frontend/decoder/vfp2.h
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2032 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <functional>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <boost/optional.hpp>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "frontend/decoder/decoder_detail.h"
|
||||||
|
|
||||||
|
namespace Dynarmic {
|
||||||
|
namespace Arm {
|
||||||
|
|
||||||
|
template <typename Visitor>
|
||||||
|
struct VFP2Matcher {
|
||||||
|
using CallRetT = typename mp::MemFnInfo<decltype(&Visitor::vfp2_VADD)>::return_type;
|
||||||
|
|
||||||
|
VFP2Matcher(const char* const name, u32 mask, u32 expect, std::function<CallRetT(Visitor&, u32)> fn)
|
||||||
|
: name(name), mask(mask), expect(expect), fn(fn) {}
|
||||||
|
|
||||||
|
/// Gets the name of this type of instruction.
|
||||||
|
const char* GetName() const {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests to see if the instruction is this type of instruction.
|
||||||
|
* @param instruction The instruction to test
|
||||||
|
* @returns true if the instruction is
|
||||||
|
*/
|
||||||
|
bool Matches(u32 instruction) const {
|
||||||
|
return (instruction & mask) == expect;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||||
|
* @param v The visitor to use
|
||||||
|
* @param instruction The instruction to decode.
|
||||||
|
*/
|
||||||
|
CallRetT call(Visitor& v, u32 instruction) const {
|
||||||
|
assert(Matches(instruction));
|
||||||
|
return fn(v, instruction);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char* name;
|
||||||
|
u32 mask, expect;
|
||||||
|
std::function<CallRetT(Visitor&, u32)> fn;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename V>
|
||||||
|
boost::optional<const VFP2Matcher<V>&> DecodeVFP2(u32 instruction) {
|
||||||
|
const static std::vector<VFP2Matcher<V>> table = {
|
||||||
|
|
||||||
|
#define INST(fn, name, bitstring) detail::detail<VFP2Matcher, u32, 32>::GetMatcher<decltype(fn)>(fn, name, bitstring)
|
||||||
|
|
||||||
|
// cccc1110________----101-__-0----
|
||||||
|
|
||||||
|
// Floating-point three-register data processing instructions
|
||||||
|
// VMLA
|
||||||
|
// VMLS
|
||||||
|
// VNMLA
|
||||||
|
// VNMLS
|
||||||
|
// VNMUL
|
||||||
|
// VMUL
|
||||||
|
INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"),
|
||||||
|
// VSUB
|
||||||
|
// VDIV
|
||||||
|
|
||||||
|
// Floating-point other instructions
|
||||||
|
// VMOV_imm
|
||||||
|
// VMOV_reg
|
||||||
|
// VABS
|
||||||
|
// VNEG
|
||||||
|
// VSQRT
|
||||||
|
// VCMP
|
||||||
|
// VCMPE
|
||||||
|
// VCVT
|
||||||
|
// VCVTR
|
||||||
|
|
||||||
|
// Extension register load-store instructions
|
||||||
|
// VSTR
|
||||||
|
// VSTM
|
||||||
|
// VSTMDB
|
||||||
|
// VPUSH
|
||||||
|
// VLDR
|
||||||
|
// VLDM
|
||||||
|
// VLDMDB
|
||||||
|
// VPOP
|
||||||
|
|
||||||
|
#undef INST
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto matches_instruction = [instruction](const auto& matcher){ return matcher.Matches(instruction); };
|
||||||
|
|
||||||
|
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||||
|
return iter != table.end() ? boost::make_optional<const VFP2Matcher<V>&>(*iter) : boost::none;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Arm
|
||||||
|
} // namespace Dynarmic
|
|
@ -11,6 +11,7 @@
|
||||||
#include "common/string_util.h"
|
#include "common/string_util.h"
|
||||||
#include "frontend/arm_types.h"
|
#include "frontend/arm_types.h"
|
||||||
#include "frontend/decoder/arm.h"
|
#include "frontend/decoder/arm.h"
|
||||||
|
#include "frontend/decoder/vfp2.h"
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace Arm {
|
namespace Arm {
|
||||||
|
@ -81,6 +82,16 @@ public:
|
||||||
return "<internal error>";
|
return "<internal error>";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string FPRegStr(bool dp_operation, size_t base, bool bit) {
|
||||||
|
size_t reg_num;
|
||||||
|
if (dp_operation) {
|
||||||
|
reg_num = base + (bit ? 16 : 0);
|
||||||
|
} else {
|
||||||
|
reg_num = (base << 1) + (bit ? 1 : 0);
|
||||||
|
}
|
||||||
|
return Common::StringFromFormat("%c%zu", dp_operation ? 'd' : 's', reg_num);
|
||||||
|
}
|
||||||
|
|
||||||
// Branch instructions
|
// Branch instructions
|
||||||
std::string arm_B(Cond cond, Imm24 imm24) {
|
std::string arm_B(Cond cond, Imm24 imm24) {
|
||||||
s32 offset = Common::SignExtend<26, s32>(imm24 << 2) + 8;
|
s32 offset = Common::SignExtend<26, s32>(imm24 << 2) + 8;
|
||||||
|
@ -497,12 +508,22 @@ public:
|
||||||
std::string arm_RFE() { return "ice"; }
|
std::string arm_RFE() { return "ice"; }
|
||||||
std::string arm_SETEND(bool E) { return "ice"; }
|
std::string arm_SETEND(bool E) { return "ice"; }
|
||||||
std::string arm_SRS() { return "ice"; }
|
std::string arm_SRS() { return "ice"; }
|
||||||
|
|
||||||
|
// Floating point arithmetic instructions
|
||||||
|
std::string vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vadd%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string DisassembleArm(u32 instruction) {
|
std::string DisassembleArm(u32 instruction) {
|
||||||
DisassemblerVisitor visitor;
|
DisassemblerVisitor visitor;
|
||||||
auto decoder = DecodeArm<DisassemblerVisitor>(instruction);
|
if (auto vfp_decoder = DecodeVFP2<DisassemblerVisitor>(instruction)) {
|
||||||
return !decoder ? Common::StringFromFormat("UNKNOWN: %x", instruction) : decoder->call(visitor, instruction);
|
return vfp_decoder->call(visitor, instruction);
|
||||||
|
} else if (auto decoder = DecodeArm<DisassemblerVisitor>(instruction)) {
|
||||||
|
return decoder->call(visitor, instruction);
|
||||||
|
} else {
|
||||||
|
return Common::StringFromFormat("UNKNOWN: %x", instruction);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Arm
|
} // namespace Arm
|
||||||
|
|
|
@ -282,6 +282,16 @@ IR::Value IREmitter::ByteReverseDual(const IR::Value& a) {
|
||||||
return Inst(IR::Opcode::ByteReverseDual, {a});
|
return Inst(IR::Opcode::ByteReverseDual, {a});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPAdd32, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPAdd64, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
IR::Value IREmitter::ReadMemory8(const IR::Value& vaddr) {
|
IR::Value IREmitter::ReadMemory8(const IR::Value& vaddr) {
|
||||||
return Inst(IR::Opcode::ReadMemory8, {vaddr});
|
return Inst(IR::Opcode::ReadMemory8, {vaddr});
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,9 @@ public:
|
||||||
IR::Value ByteReverseHalf(const IR::Value& a);
|
IR::Value ByteReverseHalf(const IR::Value& a);
|
||||||
IR::Value ByteReverseDual(const IR::Value& a);
|
IR::Value ByteReverseDual(const IR::Value& a);
|
||||||
|
|
||||||
|
IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
|
||||||
IR::Value ReadMemory8(const IR::Value& vaddr);
|
IR::Value ReadMemory8(const IR::Value& vaddr);
|
||||||
IR::Value ReadMemory16(const IR::Value& vaddr);
|
IR::Value ReadMemory16(const IR::Value& vaddr);
|
||||||
IR::Value ReadMemory32(const IR::Value& vaddr);
|
IR::Value ReadMemory32(const IR::Value& vaddr);
|
||||||
|
|
|
@ -58,6 +58,10 @@ OPCODE(ByteReverseWord, T::U32, T::U32
|
||||||
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
||||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||||
|
|
||||||
|
// Floating-point
|
||||||
|
OPCODE(FPAdd32, T::F32, T::F32, T::F32 )
|
||||||
|
OPCODE(FPAdd64, T::F64, T::F64, T::F64 )
|
||||||
|
|
||||||
// Memory access
|
// Memory access
|
||||||
OPCODE(ReadMemory8, T::U8, T::U32 )
|
OPCODE(ReadMemory8, T::U8, T::U32 )
|
||||||
OPCODE(ReadMemory16, T::U16, T::U32 )
|
OPCODE(ReadMemory16, T::U16, T::U32 )
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "frontend/arm_types.h"
|
#include "frontend/arm_types.h"
|
||||||
#include "frontend/decoder/arm.h"
|
#include "frontend/decoder/arm.h"
|
||||||
|
#include "frontend/decoder/vfp2.h"
|
||||||
#include "frontend/ir/ir.h"
|
#include "frontend/ir/ir.h"
|
||||||
#include "frontend/translate/translate.h"
|
#include "frontend/translate/translate.h"
|
||||||
#include "frontend/translate/translate_arm/translate_arm.h"
|
#include "frontend/translate/translate_arm/translate_arm.h"
|
||||||
|
@ -22,8 +23,9 @@ IR::Block TranslateArm(LocationDescriptor descriptor, MemoryRead32FuncType memor
|
||||||
const u32 arm_pc = visitor.ir.current_location.PC();
|
const u32 arm_pc = visitor.ir.current_location.PC();
|
||||||
const u32 arm_instruction = (*memory_read_32)(arm_pc);
|
const u32 arm_instruction = (*memory_read_32)(arm_pc);
|
||||||
|
|
||||||
const auto decoder = DecodeArm<ArmTranslatorVisitor>(arm_instruction);
|
if (auto vfp_decoder = DecodeVFP2<ArmTranslatorVisitor>(arm_instruction)) {
|
||||||
if (decoder) {
|
should_continue = vfp_decoder->call(visitor, arm_instruction);
|
||||||
|
} else if (auto decoder = DecodeArm<ArmTranslatorVisitor>(arm_instruction)) {
|
||||||
should_continue = decoder->call(visitor, arm_instruction);
|
should_continue = decoder->call(visitor, arm_instruction);
|
||||||
} else {
|
} else {
|
||||||
should_continue = visitor.arm_UDF();
|
should_continue = visitor.arm_UDF();
|
||||||
|
|
|
@ -317,6 +317,9 @@ struct ArmTranslatorVisitor final {
|
||||||
bool arm_RFE() { return InterpretThisInstruction(); }
|
bool arm_RFE() { return InterpretThisInstruction(); }
|
||||||
bool arm_SETEND(bool E) { return InterpretThisInstruction(); }
|
bool arm_SETEND(bool E) { return InterpretThisInstruction(); }
|
||||||
bool arm_SRS() { return InterpretThisInstruction(); }
|
bool arm_SRS() { return InterpretThisInstruction(); }
|
||||||
|
|
||||||
|
// Floating-point three-register data processing instructions
|
||||||
|
bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Arm
|
} // namespace Arm
|
||||||
|
|
38
src/frontend/translate/translate_arm/vfp2.cpp
Normal file
38
src/frontend/translate/translate_arm/vfp2.cpp
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2016 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "translate_arm.h"
|
||||||
|
|
||||||
|
namespace Dynarmic {
|
||||||
|
namespace Arm {
|
||||||
|
|
||||||
|
static ExtReg ToExtReg(bool sz, size_t base, bool bit) {
|
||||||
|
if (sz) {
|
||||||
|
return static_cast<ExtReg>(static_cast<size_t>(ExtReg::D0) + base + (bit ? 16 : 0));
|
||||||
|
} else {
|
||||||
|
return static_cast<ExtReg>(static_cast<size_t>(ExtReg::S0) + (base << 1) + (bit ? 1 : 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
|
||||||
|
// TODO: if (FSPCR.len || FPSCR.stride) return InterpretThisInstruction();
|
||||||
|
ExtReg d = ToExtReg(sz, Vd, D);
|
||||||
|
ExtReg n = ToExtReg(sz, Vn, N);
|
||||||
|
ExtReg m = ToExtReg(sz, Vm, M);
|
||||||
|
// VADD.{F32,F64} <{S,D}d>, <{S,D}n>, <{S,D}m>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(n);
|
||||||
|
auto b = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? ir.FPAdd64(a, b, true)
|
||||||
|
: ir.FPAdd32(a, b, true);
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Arm
|
||||||
|
} // namespace Dynarmic
|
|
@ -174,15 +174,13 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool DoesBehaviorMatch(const ARMul_State& interp, const Dynarmic::Jit& jit, const std::vector<WriteRecord>& interp_write_records, const std::vector<WriteRecord>& jit_write_records) {
|
static bool DoesBehaviorMatch(const ARMul_State& interp, const Dynarmic::Jit& jit, const std::vector<WriteRecord>& interp_write_records, const std::vector<WriteRecord>& jit_write_records) {
|
||||||
const auto interp_regs = interp.Reg;
|
return interp.Reg == jit.Regs()
|
||||||
const auto jit_regs = jit.Regs();
|
&& interp.ExtReg == jit.ExtRegs()
|
||||||
|
|
||||||
return std::equal(interp_regs.begin(), interp_regs.end(), jit_regs.begin(), jit_regs.end())
|
|
||||||
&& interp.Cpsr == jit.Cpsr()
|
&& interp.Cpsr == jit.Cpsr()
|
||||||
|
&& interp.VFP[VFP_FPSCR] == jit.Fpscr()
|
||||||
&& interp_write_records == jit_write_records;
|
&& interp_write_records == jit_write_records;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_execute_count, const size_t run_count, const std::function<u32()> instruction_generator) {
|
void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_execute_count, const size_t run_count, const std::function<u32()> instruction_generator) {
|
||||||
// Prepare memory
|
// Prepare memory
|
||||||
code_mem.fill(0xEAFFFFFE); // b +#0
|
code_mem.fill(0xEAFFFFFE); // b +#0
|
||||||
|
@ -199,14 +197,25 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe
|
||||||
|
|
||||||
// Setup initial state
|
// Setup initial state
|
||||||
|
|
||||||
|
u32 initial_cpsr = 0x000001D0;
|
||||||
|
|
||||||
std::array<u32, 16> initial_regs;
|
std::array<u32, 16> initial_regs;
|
||||||
std::generate_n(initial_regs.begin(), 15, []{ return RandInt<u32>(0, 0xFFFFFFFF); });
|
std::generate_n(initial_regs.begin(), 15, []{ return RandInt<u32>(0, 0xFFFFFFFF); });
|
||||||
initial_regs[15] = 0;
|
initial_regs[15] = 0;
|
||||||
|
|
||||||
interp.Cpsr = 0x000001D0;
|
std::array<u32, 64> initial_extregs;
|
||||||
|
std::generate_n(initial_extregs.begin(), 64, []{ return RandInt<u32>(0, 0xFFFFFFFF); });
|
||||||
|
|
||||||
|
u32 initial_fpscr = RandInt<u32>(0x0, 0x1) << 24;
|
||||||
|
|
||||||
|
interp.Cpsr = initial_cpsr;
|
||||||
interp.Reg = initial_regs;
|
interp.Reg = initial_regs;
|
||||||
jit.Cpsr() = 0x000001D0;
|
interp.ExtReg = initial_extregs;
|
||||||
|
interp.VFP[VFP_FPSCR] = initial_fpscr;
|
||||||
|
jit.Cpsr() = initial_cpsr;
|
||||||
jit.Regs() = initial_regs;
|
jit.Regs() = initial_regs;
|
||||||
|
jit.ExtRegs() = initial_extregs;
|
||||||
|
jit.SetFpscr(initial_fpscr);
|
||||||
|
|
||||||
std::generate_n(code_mem.begin(), instruction_count, instruction_generator);
|
std::generate_n(code_mem.begin(), instruction_count, instruction_generator);
|
||||||
|
|
||||||
|
@ -239,6 +248,11 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe
|
||||||
auto reg = Dynarmic::Arm::RegToString(static_cast<Dynarmic::Arm::Reg>(i));
|
auto reg = Dynarmic::Arm::RegToString(static_cast<Dynarmic::Arm::Reg>(i));
|
||||||
printf("%4s: %08x\n", reg, initial_regs[i]);
|
printf("%4s: %08x\n", reg, initial_regs[i]);
|
||||||
}
|
}
|
||||||
|
printf("CPSR: %08x\n", initial_cpsr);
|
||||||
|
printf("FPSCR:%08x\n", initial_fpscr);
|
||||||
|
for (int i = 0; i <= 63; i++) {
|
||||||
|
printf("S%3i: %08x\n", i, initial_extregs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
printf("\nFinal Register Listing: \n");
|
printf("\nFinal Register Listing: \n");
|
||||||
printf(" interp jit\n");
|
printf(" interp jit\n");
|
||||||
|
@ -247,6 +261,10 @@ void FuzzJitArm(const size_t instruction_count, const size_t instructions_to_exe
|
||||||
printf("%4s: %08x %08x %s\n", reg, interp.Reg[i], jit.Regs()[i], interp.Reg[i] != jit.Regs()[i] ? "*" : "");
|
printf("%4s: %08x %08x %s\n", reg, interp.Reg[i], jit.Regs()[i], interp.Reg[i] != jit.Regs()[i] ? "*" : "");
|
||||||
}
|
}
|
||||||
printf("CPSR: %08x %08x %s\n", interp.Cpsr, jit.Cpsr(), interp.Cpsr != jit.Cpsr() ? "*" : "");
|
printf("CPSR: %08x %08x %s\n", interp.Cpsr, jit.Cpsr(), interp.Cpsr != jit.Cpsr() ? "*" : "");
|
||||||
|
printf("FPSCR:%08x %08x %s\n", interp.VFP[VFP_FPSCR], jit.Fpscr(), interp.VFP[VFP_FPSCR] != jit.Fpscr() ? "*" : "");
|
||||||
|
for (int i = 0; i <= 63; i++) {
|
||||||
|
printf("S%3i: %08x %08x %s\n", i, interp.ExtReg[i], jit.ExtRegs()[i], interp.ExtReg[i] != jit.ExtRegs()[i] ? "*" : "");
|
||||||
|
}
|
||||||
|
|
||||||
printf("\nInterp Write Records:\n");
|
printf("\nInterp Write Records:\n");
|
||||||
for (auto& record : interp_write_records) {
|
for (auto& record : interp_write_records) {
|
||||||
|
|
Loading…
Reference in a new issue