From 3f6ecfe245751af2178748f91930f47d72026239 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Dec 2016 14:52:22 -0500 Subject: [PATCH] Implemented USAD8 and USADA8 --- src/backend_x64/emit_x64.cpp | 4 ++++ src/frontend/ir/ir_emitter.cpp | 5 +++- src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 1 + .../translate/translate_arm/parallel.cpp | 21 +++++++++++++++++ .../translate/translate_arm/translate_arm.h | 10 ++------ tests/arm/fuzz_arm.cpp | 23 +++++++++++++++++++ 7 files changed, 56 insertions(+), 9 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index af2c2138..9611685a 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1672,6 +1672,10 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) { EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw); } +void EmitX64::EmitPackedAbsDiffSumS8(IR::Block&, IR::Inst* inst) { + EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw); +} + static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { using namespace Xbyak::util; Xbyak::Label end; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 58fe16ee..61bc56ca 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -408,6 +408,10 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) { return Inst(Opcode::PackedSaturatedSubS16, {a, b}); } +Value IREmitter::PackedAbsDiffSumS8(const Value& a, const Value& b) { + return Inst(Opcode::PackedAbsDiffSumS8, {a, b}); +} + Value IREmitter::TransferToFP32(const Value& a) { return Inst(Opcode::TransferToFP32, {a}); } @@ -654,4 +658,3 @@ Value IREmitter::Inst(Opcode op, std::initializer_list args) { } // namespace IR } // namespace Dynarmic - diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 40a740d1..d68d51b6 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -153,6 +153,7 @@ public: Value PackedSaturatedAddS16(const Value& a, const Value& b); Value PackedSaturatedSubU16(const Value& a, const Value& b); Value PackedSaturatedSubS16(const Value& a, const Value& b); + Value PackedAbsDiffSumS8(const Value& a, const Value& b); Value TransferToFP32(const Value& a); Value TransferToFP64(const Value& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index a563d621..251bb19c 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -95,6 +95,7 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32 OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 ) +OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 ) // Floating-point operations OPCODE(TransferToFP32, T::F32, T::U32 ) diff --git a/src/frontend/translate/translate_arm/parallel.cpp b/src/frontend/translate/translate_arm/parallel.cpp index 5f374a30..50145e7e 100644 --- a/src/frontend/translate/translate_arm/parallel.cpp +++ b/src/frontend/translate/translate_arm/parallel.cpp @@ -66,6 +66,27 @@ bool ArmTranslatorVisitor::arm_USAX(Cond cond, Reg n, Reg d, Reg m) { return InterpretThisInstruction(); } +bool ArmTranslatorVisitor::arm_USAD8(Cond cond, Reg d, Reg m, Reg n) { + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedAbsDiffSumS8(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result); + } + return true; +} + +bool ArmTranslatorVisitor::arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n){ + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto tmp = ir.PackedAbsDiffSumS8(ir.GetRegister(n), ir.GetRegister(m)); + auto result = ir.AddWithCarry(ir.GetRegister(a), tmp, ir.Imm1(0)); + ir.SetRegister(d, result.result); + } + return true; +} + bool ArmTranslatorVisitor::arm_USUB8(Cond cond, Reg n, Reg d, Reg m) { if (d == Reg::PC || n == Reg::PC || m == Reg::PC) return UnpredictableInstruction(); diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index 997a20b8..973f09dd 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -214,14 +214,8 @@ struct ArmTranslatorVisitor final { bool arm_SEL(Cond cond, Reg n, Reg d, Reg m); // Unsigned sum of absolute difference functions - bool arm_USAD8(Cond cond, Reg d, Reg m, Reg n) { - UNUSED(cond, d, m, n); - return InterpretThisInstruction(); - } - bool arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n) { - UNUSED(cond, d, a, m, n); - return InterpretThisInstruction(); - } + bool arm_USAD8(Cond cond, Reg d, Reg m, Reg n); + bool arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n); // Packing instructions bool arm_PKHBT(Cond cond, Reg n, Reg d, Imm5 imm5, Reg m); diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index 16fbd140..819ccb8b 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -961,6 +961,29 @@ TEST_CASE("Fuzz ARM parallel instructions", "[JitX64]") { } } +TEST_CASE("Fuzz ARM sum of absolute differences", "[JitX64]") { + auto validate_d_m_n = [](u32 inst) -> bool { + return Bits<16, 19>(inst) != 15 && + Bits<8, 11>(inst) != 15 && + Bits<0, 3>(inst) != 15; + }; + auto validate_d_a_m_n = [&](u32 inst) -> bool { + return validate_d_m_n(inst) && + Bits<12, 15>(inst) != 15; + }; + + const std::array differences_instructions = {{ + InstructionGenerator("cccc01111000dddd1111mmmm0001nnnn", validate_d_m_n), // USAD8 + InstructionGenerator("cccc01111000ddddaaaammmm0001nnnn", validate_d_a_m_n), // USADA8 + }}; + + SECTION("Sum of Absolute Differences (Differences)") { + FuzzJitArm(1, 1, 10000, [&differences_instructions]() -> u32 { + return differences_instructions[RandInt(0, differences_instructions.size() - 1)].Generate(); + }); + } +} + TEST_CASE( "SMUAD", "[JitX64]" ) { Dynarmic::Jit jit{GetUserCallbacks()}; code_mem.fill({});