VFP: Implement VSQRT
This commit is contained in:
parent
cd8e7c0504
commit
0f412247ed
8 changed files with 88 additions and 11 deletions
|
@ -1090,7 +1090,7 @@ static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) {
|
|||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
static void FPOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||
static void FPThreeOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
IR::Value b = inst->GetArg(1);
|
||||
|
||||
|
@ -1111,7 +1111,7 @@ static void FPOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::
|
|||
}
|
||||
}
|
||||
|
||||
static void FPOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||
static void FPThreeOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
IR::Value b = inst->GetArg(1);
|
||||
|
||||
|
@ -1132,6 +1132,42 @@ static void FPOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::
|
|||
}
|
||||
}
|
||||
|
||||
static void FPTwoOp32(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
|
||||
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
DenormalsAreZero32(code, result, gpr_scratch);
|
||||
}
|
||||
(code->*fn)(result, R(result));
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
FlushToZero32(code, result, gpr_scratch);
|
||||
}
|
||||
if (block.location.FPSCR_DN()) {
|
||||
DefaultNaN32(code, routines, result);
|
||||
}
|
||||
}
|
||||
|
||||
static void FPTwoOp64(XEmitter* code, Routines* routines, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
|
||||
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
DenormalsAreZero64(code, routines, result, gpr_scratch);
|
||||
}
|
||||
(code->*fn)(result, R(result));
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
FlushToZero64(code, routines, result, gpr_scratch);
|
||||
}
|
||||
if (block.location.FPSCR_DN()) {
|
||||
DefaultNaN64(code, routines, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPAbs32(IR::Block&, IR::Inst* inst) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
|
||||
|
@ -1165,35 +1201,43 @@ void EmitX64::EmitFPNeg64(IR::Block&, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS);
|
||||
FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD);
|
||||
FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPDiv32(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::DIVSS);
|
||||
FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::DIVSS);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPDiv64(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::DIVSD);
|
||||
FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::DIVSD);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS);
|
||||
FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPMul64(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::MULSD);
|
||||
FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::MULSD);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPSqrt32(IR::Block& block, IR::Inst* inst) {
|
||||
FPTwoOp32(code, routines, reg_alloc, block, inst, &XEmitter::SQRTSS);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPSqrt64(IR::Block& block, IR::Inst* inst) {
|
||||
FPTwoOp64(code, routines, reg_alloc, block, inst, &XEmitter::SQRTSD);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::SUBSS);
|
||||
FPThreeOp32(code, routines, reg_alloc, block, inst, &XEmitter::SUBSS);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) {
|
||||
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::SUBSD);
|
||||
FPThreeOp64(code, routines, reg_alloc, block, inst, &XEmitter::SUBSD);
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
||||
|
|
|
@ -79,7 +79,7 @@ boost::optional<const VFP2Matcher<V>&> DecodeVFP2(u32 instruction) {
|
|||
// VMOV_reg
|
||||
INST(&V::vfp2_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm"),
|
||||
INST(&V::vfp2_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm"),
|
||||
// VSQRT
|
||||
INST(&V::vfp2_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm"),
|
||||
// VCMP
|
||||
// VCMPE
|
||||
// VCVT
|
||||
|
|
|
@ -603,6 +603,10 @@ public:
|
|||
std::string vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||
return Common::StringFromFormat("vneg%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||
}
|
||||
|
||||
std::string vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||
return Common::StringFromFormat("vsqrt%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||
}
|
||||
};
|
||||
|
||||
std::string DisassembleArm(u32 instruction) {
|
||||
|
|
|
@ -320,6 +320,13 @@ IR::Value IREmitter::FPNeg64(const IR::Value& a) {
|
|||
return Inst(IR::Opcode::FPNeg64, {a});
|
||||
}
|
||||
|
||||
IR::Value IREmitter::FPSqrt32(const IR::Value& a) {
|
||||
return Inst(IR::Opcode::FPSqrt32, {a});
|
||||
}
|
||||
|
||||
IR::Value IREmitter::FPSqrt64(const IR::Value& a) {
|
||||
return Inst(IR::Opcode::FPSqrt64, {a});
|
||||
}
|
||||
|
||||
IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||
ASSERT(fpscr_controlled);
|
||||
|
|
|
@ -102,6 +102,8 @@ public:
|
|||
IR::Value FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||
IR::Value FPNeg32(const IR::Value& a);
|
||||
IR::Value FPNeg64(const IR::Value& a);
|
||||
IR::Value FPSqrt32(const IR::Value& a);
|
||||
IR::Value FPSqrt64(const IR::Value& a);
|
||||
IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||
IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||
|
||||
|
|
|
@ -70,6 +70,8 @@ OPCODE(FPMul32, T::F32, T::F32, T::F32
|
|||
OPCODE(FPMul64, T::F64, T::F64, T::F64 )
|
||||
OPCODE(FPNeg32, T::F32, T::F32 )
|
||||
OPCODE(FPNeg64, T::F64, T::F64 )
|
||||
OPCODE(FPSqrt32, T::F32, T::F32 )
|
||||
OPCODE(FPSqrt64, T::F64, T::F64 )
|
||||
OPCODE(FPSub32, T::F32, T::F32, T::F32 )
|
||||
OPCODE(FPSub64, T::F64, T::F64, T::F64 )
|
||||
|
||||
|
|
|
@ -332,6 +332,7 @@ struct ArmTranslatorVisitor final {
|
|||
// Floating-point misc instructions
|
||||
bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||
bool vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||
bool vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||
};
|
||||
|
||||
} // namespace Arm
|
||||
|
|
|
@ -226,5 +226,22 @@ bool ArmTranslatorVisitor::vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ArmTranslatorVisitor::vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
|
||||
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions
|
||||
|
||||
ExtReg d = ToExtReg(sz, Vd, D);
|
||||
ExtReg m = ToExtReg(sz, Vm, M);
|
||||
// VSQRT.{F32,F64} <{S,D}d>, <{S,D}m>
|
||||
if (ConditionPassed(cond)) {
|
||||
auto a = ir.GetExtendedRegister(m);
|
||||
auto result = sz
|
||||
? ir.FPSqrt64(a)
|
||||
: ir.FPSqrt32(a);
|
||||
ir.SetExtendedRegister(d, result);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Arm
|
||||
} // namespace Dynarmic
|
||||
|
|
Loading…
Reference in a new issue