Implement QADD, QSUB, QDADD, QDSUB

This commit is contained in:
MerryMage 2016-12-15 22:33:20 +00:00
parent b178ab3bec
commit 96e46ba6b5
8 changed files with 210 additions and 43 deletions

View file

@ -21,6 +21,7 @@ set(SRCS
frontend/translate/translate_arm/packing.cpp
frontend/translate/translate_arm/parallel.cpp
frontend/translate/translate_arm/reversal.cpp
frontend/translate/translate_arm/saturated.cpp
frontend/translate/translate_arm/status_register_access.cpp
frontend/translate/translate_arm/synchronization.cpp
frontend/translate/translate_arm/vfp2.cpp

View file

@ -1256,6 +1256,77 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
code->bswap(result);
}
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
code->lzcnt(result, source);
} else {
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
// The result of a bsr of zero is undefined, but zf is set after it.
code->bsr(result, source);
code->mov(source, 0xFFFFFFFF);
code->cmovz(result, source);
code->neg(result);
code->add(result, 31);
}
}
void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32();
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
code->mov(overflow, result);
code->shr(overflow, 31);
code->add(overflow, 0x7FFFFFFF);
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
code->add(result, addend);
code->cmovo(result, overflow);
if (overflow_inst) {
EraseInstruction(block, overflow_inst);
inst->DecrementRemainingUses();
code->seto(overflow.cvt8());
}
}
void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32();
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
code->mov(overflow, result);
code->shr(overflow, 31);
code->add(overflow, 0x7FFFFFFF);
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
code->sub(result, subend);
code->cmovo(result, overflow);
if (overflow_inst) {
EraseInstruction(block, overflow_inst);
inst->DecrementRemainingUses();
code->seto(overflow.cvt8());
}
}
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
@ -1600,27 +1671,6 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) {
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
}
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
code->lzcnt(result, source);
} else {
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
// The result of a bsr of zero is undefined, but zf is set after it.
code->bsr(result, source);
code->mov(source, 0xFFFFFFFF);
code->cmovz(result, source);
code->neg(result);
code->add(result, 31);
}
}
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;

View file

@ -324,6 +324,22 @@ Value IREmitter::ByteReverseDual(const Value& a) {
return Inst(Opcode::ByteReverseDual, {a});
}
Value IREmitter::CountLeadingZeros(const Value& a) {
return Inst(Opcode::CountLeadingZeros, {a});
}
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) {
auto result = Inst(Opcode::SignedSaturatedAdd, {a, b});
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
return {result, overflow};
}
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedSub(const Value& a, const Value& b) {
auto result = Inst(Opcode::SignedSaturatedSub, {a, b});
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
return {result, overflow};
}
IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
auto result = Inst(Opcode::PackedAddU8, {a, b});
auto ge = Inst(Opcode::GetGEFromOp, {result});
@ -392,10 +408,6 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) {
return Inst(Opcode::PackedSaturatedSubS16, {a, b});
}
Value IREmitter::CountLeadingZeros(const Value& a) {
return Inst(Opcode::CountLeadingZeros, {a});
}
Value IREmitter::TransferToFP32(const Value& a) {
return Inst(Opcode::TransferToFP32, {a});
}

View file

@ -43,6 +43,11 @@ public:
Value carry;
};
struct ResultAndOverflow {
Value result;
Value overflow;
};
struct ResultAndCarryAndOverflow {
Value result;
Value carry;
@ -127,6 +132,11 @@ public:
Value ByteReverseWord(const Value& a);
Value ByteReverseHalf(const Value& a);
Value ByteReverseDual(const Value& a);
Value CountLeadingZeros(const Value& a);
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
ResultAndGE PackedAddU8(const Value& a, const Value& b);
ResultAndGE PackedSubU8(const Value& a, const Value& b);
Value PackedHalvingAddU8(const Value& a, const Value& b);
@ -143,7 +153,6 @@ public:
Value PackedSaturatedAddS16(const Value& a, const Value& b);
Value PackedSaturatedSubU16(const Value& a, const Value& b);
Value PackedSaturatedSubS16(const Value& a, const Value& b);
Value CountLeadingZeros(const Value& a);
Value TransferToFP32(const Value& a);
Value TransferToFP64(const Value& a);

View file

@ -72,6 +72,13 @@ OPCODE(ZeroExtendByteToWord, T::U32, T::U8
OPCODE(ByteReverseWord, T::U32, T::U32 )
OPCODE(ByteReverseHalf, T::U16, T::U16 )
OPCODE(ByteReverseDual, T::U64, T::U64 )
OPCODE(CountLeadingZeros, T::U32, T::U32 )
// Saturated instructions
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
// Packed instructions
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
@ -88,7 +95,6 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
OPCODE(CountLeadingZeros, T::U32, T::U32 )
// Floating-point operations
OPCODE(TransferToFP32, T::F32, T::U32 )

View file

@ -0,0 +1,78 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "translate_arm.h"
namespace Dynarmic {
namespace Arm {
bool ArmTranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
// QADD <Rd>, <Rm>, <Rn>
if (ConditionPassed(cond)) {
auto a = ir.GetRegister(m);
auto b = ir.GetRegister(n);
auto result = ir.SignedSaturatedAdd(a, b);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
}
return true;
}
bool ArmTranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
// QSUB <Rd>, <Rm>, <Rn>
if (ConditionPassed(cond)) {
auto a = ir.GetRegister(m);
auto b = ir.GetRegister(n);
auto result = ir.SignedSaturatedSub(a, b);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
}
return true;
}
bool ArmTranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
// QDADD <Rd>, <Rm>, <Rn>
if (ConditionPassed(cond)) {
auto a = ir.GetRegister(m);
auto b = ir.GetRegister(n);
auto doubled = ir.SignedSaturatedAdd(b, b);
ir.OrQFlag(doubled.overflow);
auto result = ir.SignedSaturatedAdd(a, doubled.result);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
}
return true;
}
bool ArmTranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
// QDSUB <Rd>, <Rm>, <Rn>
if (ConditionPassed(cond)) {
auto a = ir.GetRegister(m);
auto b = ir.GetRegister(n);
auto doubled = ir.SignedSaturatedAdd(b, b);
ir.OrQFlag(doubled.overflow);
auto result = ir.SignedSaturatedSub(a, doubled.result);
ir.SetRegister(d, result.result);
ir.OrQFlag(result.overflow);
}
return true;
}
} // namespace Arm
} // namespace Dynarmic

View file

@ -326,22 +326,10 @@ struct ArmTranslatorVisitor final {
bool arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m);
// Saturated Add/Subtract instructions
bool arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
UNUSED(cond, d, m, n);
return InterpretThisInstruction();
}
bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
UNUSED(cond, d, m, n);
return InterpretThisInstruction();
}
bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
UNUSED(cond, d, m, n);
return InterpretThisInstruction();
}
bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
UNUSED(cond, d, m, n);
return InterpretThisInstruction();
}
bool arm_QADD(Cond cond, Reg n, Reg d, Reg m);
bool arm_QSUB(Cond cond, Reg n, Reg d, Reg m);
bool arm_QDADD(Cond cond, Reg n, Reg d, Reg m);
bool arm_QDSUB(Cond cond, Reg n, Reg d, Reg m);
// Synchronization Primitive instructions
bool arm_CLREX();

View file

@ -985,6 +985,29 @@ TEST_CASE("Test ARM misc instructions", "[JitX64]") {
}
}
TEST_CASE("Fuzz ARM saturated instructions", "[JitX64]") {
auto is_valid = [](u32 inst) -> bool {
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
return Bits<16, 19>(inst) != 0b1111 &&
Bits<12, 15>(inst) != 0b1111 &&
Bits<0, 3>(inst) != 0b1111;
};
const std::array<InstructionGenerator, 4> instructions = {{
InstructionGenerator("cccc00010000nnnndddd00000101mmmm", is_valid), // QADD
InstructionGenerator("cccc00010010nnnndddd00000101mmmm", is_valid), // QSUB
InstructionGenerator("cccc00010100nnnndddd00000101mmmm", is_valid), // QDADD
InstructionGenerator("cccc00010110nnnndddd00000101mmmm", is_valid), // QDSUB
}};
SECTION("Saturated") {
FuzzJitArm(4, 5, 10000, [&instructions]() -> u32 {
return instructions[RandInt<size_t>(0, instructions.size() - 1)].Generate();
});
}
}
TEST_CASE("Fuzz ARM packing instructions", "[JitX64]") {
auto is_pkh_valid = [](u32 inst) -> bool {
// R15 as Rd, Rn, or Rm is UNPREDICTABLE