From 8919265d2c3a3e3cc5c815322b1bd0240c9bafb0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Dec 2016 11:25:41 -0500 Subject: [PATCH] Implement SADD8, SADD16, SSUB8, SSUB16, USUB16 --- src/backend_x64/emit_x64.cpp | 278 ++++++++++++++++-- src/frontend/ir/ir_emitter.cpp | 44 +++ src/frontend/ir/ir_emitter.h | 8 + src/frontend/ir/opcodes.inc | 8 + .../translate/translate_arm/parallel.cpp | 62 +++- tests/arm/fuzz_arm.cpp | 40 ++- 6 files changed, 398 insertions(+), 42 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 9611685a..4c7d00e8 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1278,6 +1278,26 @@ void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) { } } +void EmitX64::EmitNegateLowWord(IR::Block&, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + + Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + + code->ror(result, 16); + code->xor(result, 0xFFFF0000); + code->add(result, 0x00010000); + code->ror(result, 16); +} + +void EmitX64::EmitNegateHighWord(IR::Block&, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + + Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32(); + + code->xor(result, 0xFFFF0000); + code->add(result, 0x00010000); +} + void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); @@ -1328,6 +1348,25 @@ void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) { } } +static void ExtractMostSignificantBitFromPackedBytes(const Xbyak::util::Cpu& cpu_info, BlockOfCode* code, RegAlloc& reg_alloc, Xbyak::Reg32 value, boost::optional a_tmp = boost::none) { + if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) { + Xbyak::Reg32 tmp = a_tmp ? *a_tmp : reg_alloc.ScratchGpr().cvt32(); + code->mov(tmp, 0x80808080); + code->pext(value, value, tmp); + } else { + code->and_(value, 0x80808080); + code->imul(value, value, 0x00204081); + code->shr(value, 28); + } +} + +static void ExtractAndDuplicateMostSignificantBitFromPackedWords(BlockOfCode* code, Xbyak::Reg32 value) { + code->and_(value, 0x80008000); + code->shr(value, 1); + code->imul(value, value, 0xC003); + code->shr(value, 28); +} + void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) { auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -1364,14 +1403,119 @@ void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) { } code->xor_(result, reg_a); if (ge_inst) { - if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) { - code->mov(tmp, 0x80808080); - code->pext(reg_ge, reg_ge, tmp); - } else { - code->and_(reg_ge, 0x80808080); - code->imul(reg_ge, reg_ge, 0x0204081); - code->shr(reg_ge, 28); - } + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + } +} + +void EmitX64::EmitPackedAddS8(IR::Block& block, IR::Inst* inst) { + auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_ge; + + Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + + if (ge_inst) { + EraseInstruction(block, ge_inst); + inst->DecrementRemainingUses(); + + reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + } + + code->movd(xmm_a, reg_a); + code->movd(xmm_b, reg_b); + if (ge_inst) { + Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); + code->movdqa(saturated_sum, xmm_a); + code->paddsb(saturated_sum, xmm_b); + code->movd(reg_ge, saturated_sum); + } + code->paddb(xmm_a, xmm_b); + code->movd(reg_a, xmm_a); + if (ge_inst) { + code->not_(reg_ge); + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + } +} + +void EmitX64::EmitPackedAddU16(IR::Block& block, IR::Inst* inst) { + auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32(); + Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32(); + Xbyak::Reg32 reg_ge, tmp; + + if (ge_inst) { + EraseInstruction(block, ge_inst); + inst->DecrementRemainingUses(); + + reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + + code->mov(reg_ge, reg_a); + code->and_(reg_ge, reg_b); + } + + // SWAR Arithmetic + code->mov(result, reg_a); + code->xor_(result, reg_b); + code->and_(result, 0x80008000); + code->and_(reg_a, 0x7FFF7FFF); + code->and_(reg_b, 0x7FFF7FFF); + code->add(reg_a, reg_b); + if (ge_inst) { + tmp = reg_alloc.ScratchGpr().cvt32(); + code->mov(tmp, result); + code->and_(tmp, reg_a); + code->or_(reg_ge, tmp); + } + code->xor_(result, reg_a); + if (ge_inst) { + ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + } +} + +void EmitX64::EmitPackedAddS16(IR::Block& block, IR::Inst* inst) { + auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_ge; + + Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + + if (ge_inst) { + EraseInstruction(block, ge_inst); + inst->DecrementRemainingUses(); + + reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + } + + code->movd(xmm_a, reg_a); + code->movd(xmm_b, reg_b); + if (ge_inst) { + Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); + code->movdqa(saturated_sum, xmm_a); + code->paddsw(saturated_sum, xmm_b); + code->movd(reg_ge, saturated_sum); + } + code->paddw(xmm_a, xmm_b); + code->movd(reg_a, xmm_a); + if (ge_inst) { + code->not_(reg_ge); + ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); } } @@ -1409,15 +1553,115 @@ void EmitX64::EmitPackedSubU8(IR::Block& block, IR::Inst* inst) { code->movd(reg_a, xmm_a); if (ge_inst) { - if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) { - Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); - code->mov(tmp, 0x80808080); - code->pext(reg_ge, reg_ge, tmp); - } else { - code->and_(reg_ge, 0x80808080); - code->imul(reg_ge, reg_ge, 0x0204081); - code->shr(reg_ge, 28); - } + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + } +} + + +void EmitX64::EmitPackedSubS8(IR::Block& block, IR::Inst* inst) { + auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_ge; + + Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + + if (ge_inst) { + EraseInstruction(block, ge_inst); + inst->DecrementRemainingUses(); + reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + } + code->movd(xmm_b, reg_b); + code->movd(xmm_a, reg_a); + if (ge_inst) { + Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + code->movdqa(xmm_ge, xmm_a); + code->psubsb(xmm_ge, xmm_b); + code->movd(reg_ge, xmm_ge); + } + code->psubb(xmm_a, xmm_b); + code->movd(reg_a, xmm_a); + if (ge_inst) { + code->not_(reg_ge); + ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge); + } +} + +void EmitX64::EmitPackedSubU16(IR::Block& block, IR::Inst* inst) { + auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_ge; + + Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge; + + if (ge_inst) { + EraseInstruction(block, ge_inst); + inst->DecrementRemainingUses(); + + reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + xmm_ge = reg_alloc.ScratchXmm(); + } + + code->movd(xmm_a, reg_a); + code->movd(xmm_b, reg_b); + if (ge_inst) { + code->movdqa(xmm_ge, xmm_a); + code->pmaxuw(xmm_ge, xmm_b); + code->pcmpeqw(xmm_ge, xmm_a); + code->movd(reg_ge, xmm_ge); + } + code->psubw(xmm_a, xmm_b); + code->movd(reg_a, xmm_a); + if (ge_inst) { + ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); + } +} + +void EmitX64::EmitPackedSubS16(IR::Block& block, IR::Inst* inst) { + auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); + + IR::Value a = inst->GetArg(0); + IR::Value b = inst->GetArg(1); + + Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32(); + Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32(); + Xbyak::Reg32 reg_ge; + + Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm(); + + if (ge_inst) { + EraseInstruction(block, ge_inst); + inst->DecrementRemainingUses(); + + reg_ge = reg_alloc.DefGpr(ge_inst).cvt32(); + } + + code->movd(xmm_b, reg_b); + code->movd(xmm_a, reg_a); + if (ge_inst) { + Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + code->movdqa(xmm_ge, xmm_a); + code->psubsw(xmm_ge, xmm_b); + code->movd(reg_ge, xmm_ge); + } + code->psubw(xmm_a, xmm_b); + code->movd(reg_a, xmm_a); + if (ge_inst) { + code->not_(reg_ge); + ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge); } } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 61bc56ca..5db49c7c 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -328,6 +328,14 @@ Value IREmitter::CountLeadingZeros(const Value& a) { return Inst(Opcode::CountLeadingZeros, {a}); } +Value IREmitter::NegateLowWord(const Value& a) { + return Inst(Opcode::NegateLowWord, {a}); +} + +Value IREmitter::NegateHighWord(const Value& a) { + return Inst(Opcode::NegateHighWord, {a}); +} + IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) { auto result = Inst(Opcode::SignedSaturatedAdd, {a, b}); auto overflow = Inst(Opcode::GetOverflowFromOp, {result}); @@ -346,12 +354,48 @@ IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) { return {result, ge}; } +IREmitter::ResultAndGE IREmitter::PackedAddS8(const Value& a, const Value& b) { + auto result = Inst(Opcode::PackedAddS8, {a, b}); + auto ge = Inst(Opcode::GetGEFromOp, {result}); + return {result, ge}; +} + +IREmitter::ResultAndGE IREmitter::PackedAddU16(const Value& a, const Value& b) { + auto result = Inst(Opcode::PackedAddU16, {a, b}); + auto ge = Inst(Opcode::GetGEFromOp, {result}); + return {result, ge}; +} + +IREmitter::ResultAndGE IREmitter::PackedAddS16(const Value& a, const Value& b) { + auto result = Inst(Opcode::PackedAddS16, {a, b}); + auto ge = Inst(Opcode::GetGEFromOp, {result}); + return {result, ge}; +} + IREmitter::ResultAndGE IREmitter::PackedSubU8(const Value& a, const Value& b) { auto result = Inst(Opcode::PackedSubU8, {a, b}); auto ge = Inst(Opcode::GetGEFromOp, {result}); return {result, ge}; } +IREmitter::ResultAndGE IREmitter::PackedSubS8(const Value& a, const Value& b) { + auto result = Inst(Opcode::PackedSubS8, {a, b}); + auto ge = Inst(Opcode::GetGEFromOp, {result}); + return {result, ge}; +} + +IREmitter::ResultAndGE IREmitter::PackedSubU16(const Value& a, const Value& b) { + auto result = Inst(Opcode::PackedSubU16, {a, b}); + auto ge = Inst(Opcode::GetGEFromOp, {result}); + return {result, ge}; +} + +IREmitter::ResultAndGE IREmitter::PackedSubS16(const Value& a, const Value& b) { + auto result = Inst(Opcode::PackedSubS16, {a, b}); + auto ge = Inst(Opcode::GetGEFromOp, {result}); + return {result, ge}; +} + Value IREmitter::PackedHalvingAddU8(const Value& a, const Value& b) { return Inst(Opcode::PackedHalvingAddU8, {a, b}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index d68d51b6..015002f0 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -133,12 +133,20 @@ public: Value ByteReverseHalf(const Value& a); Value ByteReverseDual(const Value& a); Value CountLeadingZeros(const Value& a); + Value NegateLowWord(const Value& a); + Value NegateHighWord(const Value& a); ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b); ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b); ResultAndGE PackedAddU8(const Value& a, const Value& b); + ResultAndGE PackedAddS8(const Value& a, const Value& b); + ResultAndGE PackedAddU16(const Value& a, const Value& b); + ResultAndGE PackedAddS16(const Value& a, const Value& b); ResultAndGE PackedSubU8(const Value& a, const Value& b); + ResultAndGE PackedSubS8(const Value& a, const Value& b); + ResultAndGE PackedSubU16(const Value& a, const Value& b); + ResultAndGE PackedSubS16(const Value& a, const Value& b); Value PackedHalvingAddU8(const Value& a, const Value& b); Value PackedHalvingAddS8(const Value& a, const Value& b); Value PackedHalvingSubU8(const Value& a, const Value& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 251bb19c..79d0c247 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -73,6 +73,8 @@ OPCODE(ByteReverseWord, T::U32, T::U32 OPCODE(ByteReverseHalf, T::U16, T::U16 ) OPCODE(ByteReverseDual, T::U64, T::U64 ) OPCODE(CountLeadingZeros, T::U32, T::U32 ) +OPCODE(NegateLowWord, T::U32, T::U32 ) +OPCODE(NegateHighWord, T::U32, T::U32 ) // Saturated instructions OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 ) @@ -80,7 +82,13 @@ OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 // Packed instructions OPCODE(PackedAddU8, T::U32, T::U32, T::U32 ) +OPCODE(PackedAddS8, T::U32, T::U32, T::U32 ) OPCODE(PackedSubU8, T::U32, T::U32, T::U32 ) +OPCODE(PackedSubS8, T::U32, T::U32, T::U32 ) +OPCODE(PackedAddU16, T::U32, T::U32, T::U32 ) +OPCODE(PackedAddS16, T::U32, T::U32, T::U32 ) +OPCODE(PackedSubU16, T::U32, T::U32, T::U32 ) +OPCODE(PackedSubS16, T::U32, T::U32, T::U32 ) OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 ) OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 ) OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 ) diff --git a/src/frontend/translate/translate_arm/parallel.cpp b/src/frontend/translate/translate_arm/parallel.cpp index 50145e7e..292e5811 100644 --- a/src/frontend/translate/translate_arm/parallel.cpp +++ b/src/frontend/translate/translate_arm/parallel.cpp @@ -11,13 +11,25 @@ namespace Arm { // Parallel Add/Subtract (Modulo arithmetic) instructions bool ArmTranslatorVisitor::arm_SADD8(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, n, d, m); - return InterpretThisInstruction(); + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedAddS8(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result.result); + ir.SetGEFlags(result.ge); + } + return true; } bool ArmTranslatorVisitor::arm_SADD16(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, n, d, m); - return InterpretThisInstruction(); + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedAddS16(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result.result); + ir.SetGEFlags(result.ge); + } + return true; } bool ArmTranslatorVisitor::arm_SASX(Cond cond, Reg n, Reg d, Reg m) { @@ -31,13 +43,25 @@ bool ArmTranslatorVisitor::arm_SSAX(Cond cond, Reg n, Reg d, Reg m) { } bool ArmTranslatorVisitor::arm_SSUB8(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, n, d, m); - return InterpretThisInstruction(); + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedSubS8(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result.result); + ir.SetGEFlags(result.ge); + } + return true; } bool ArmTranslatorVisitor::arm_SSUB16(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, n, d, m); - return InterpretThisInstruction(); + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedSubS16(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result.result); + ir.SetGEFlags(result.ge); + } + return true; } bool ArmTranslatorVisitor::arm_UADD8(Cond cond, Reg n, Reg d, Reg m) { @@ -52,8 +76,14 @@ bool ArmTranslatorVisitor::arm_UADD8(Cond cond, Reg n, Reg d, Reg m) { } bool ArmTranslatorVisitor::arm_UADD16(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, n, d, m); - return InterpretThisInstruction(); + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedAddU16(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result.result); + ir.SetGEFlags(result.ge); + } + return true; } bool ArmTranslatorVisitor::arm_UASX(Cond cond, Reg n, Reg d, Reg m) { @@ -99,11 +129,16 @@ bool ArmTranslatorVisitor::arm_USUB8(Cond cond, Reg n, Reg d, Reg m) { } bool ArmTranslatorVisitor::arm_USUB16(Cond cond, Reg n, Reg d, Reg m) { - UNUSED(cond, n, d, m); - return InterpretThisInstruction(); + if (d == Reg::PC || n == Reg::PC || m == Reg::PC) + return UnpredictableInstruction(); + if (ConditionPassed(cond)) { + auto result = ir.PackedSubU16(ir.GetRegister(n), ir.GetRegister(m)); + ir.SetRegister(d, result.result); + ir.SetGEFlags(result.ge); + } + return true; } - // Parallel Add/Subtract (Saturating) instructions bool ArmTranslatorVisitor::arm_QADD8(Cond cond, Reg n, Reg d, Reg m) { if (d == Reg::PC || n == Reg::PC || m == Reg::PC) @@ -201,7 +236,6 @@ bool ArmTranslatorVisitor::arm_UQSUB16(Cond cond, Reg n, Reg d, Reg m) { return true; } - // Parallel Add/Subtract (Halving) instructions bool ArmTranslatorVisitor::arm_SHADD8(Cond cond, Reg n, Reg d, Reg m) { if (d == Reg::PC || n == Reg::PC || m == Reg::PC) diff --git a/tests/arm/fuzz_arm.cpp b/tests/arm/fuzz_arm.cpp index 819ccb8b..f7dd2645 100644 --- a/tests/arm/fuzz_arm.cpp +++ b/tests/arm/fuzz_arm.cpp @@ -895,27 +895,33 @@ TEST_CASE("Fuzz ARM multiply instructions", "[JitX64]") { } } -TEST_CASE("Fuzz ARM parallel instructions", "[JitX64]") { +TEST_CASE("Fuzz ARM parallel instructions", "[JitX64][parallel]") { const auto is_valid = [](u32 instr) -> bool { // R15 as Rd, Rn, or Rm is UNPREDICTABLE return Bits<0, 3>(instr) != 0b1111 && Bits<12, 15>(instr) != 0b1111 && Bits<16, 19>(instr) != 0b1111; }; - const std::array modulo_instructions = {{ + const std::array modulo_add_instructions = {{ InstructionGenerator("cccc01100001nnnndddd11111001mmmm", is_valid), // SADD8 InstructionGenerator("cccc01100001nnnndddd11110001mmmm", is_valid), // SADD16 - InstructionGenerator("cccc01100001nnnndddd11110011mmmm", is_valid), // SASX - InstructionGenerator("cccc01100001nnnndddd11110101mmmm", is_valid), // SSAX - InstructionGenerator("cccc01100001nnnndddd11111111mmmm", is_valid), // SSUB8 - InstructionGenerator("cccc01100001nnnndddd11110111mmmm", is_valid), // SSUB16 InstructionGenerator("cccc01100101nnnndddd11111001mmmm", is_valid), // UADD8 InstructionGenerator("cccc01100101nnnndddd11110001mmmm", is_valid), // UADD16 - InstructionGenerator("cccc01100101nnnndddd11110011mmmm", is_valid), // UASX - InstructionGenerator("cccc01100101nnnndddd11110101mmmm", is_valid), // USAX + }}; + + const std::array modulo_sub_instructions = {{ + InstructionGenerator("cccc01100001nnnndddd11111111mmmm", is_valid), // SSUB8 + InstructionGenerator("cccc01100001nnnndddd11110111mmmm", is_valid), // SSUB16 InstructionGenerator("cccc01100101nnnndddd11111111mmmm", is_valid), // USUB8 InstructionGenerator("cccc01100101nnnndddd11110111mmmm", is_valid), // USUB16 }}; + const std::array modulo_exchange_instructions = {{ + InstructionGenerator("cccc01100001nnnndddd11110011mmmm", is_valid), // SASX + InstructionGenerator("cccc01100001nnnndddd11110101mmmm", is_valid), // SSAX + InstructionGenerator("cccc01100101nnnndddd11110011mmmm", is_valid), // UASX + InstructionGenerator("cccc01100101nnnndddd11110101mmmm", is_valid), // USAX + }}; + const std::array saturating_instructions = {{ InstructionGenerator("cccc01100010nnnndddd11111001mmmm", is_valid), // QADD8 InstructionGenerator("cccc01100010nnnndddd11111111mmmm", is_valid), // QSUB8 @@ -942,9 +948,21 @@ TEST_CASE("Fuzz ARM parallel instructions", "[JitX64]") { InstructionGenerator("cccc01100111nnnndddd11110111mmmm", is_valid), // UHSUB16 }}; - SECTION("Parallel Add/Subtract (Modulo)") { - FuzzJitArm(1, 1, 10000, [&modulo_instructions]() -> u32 { - return modulo_instructions[RandInt(0, modulo_instructions.size() - 1)].Generate(); + SECTION("Parallel Add (Modulo)") { + FuzzJitArm(1, 1, 10000, [&modulo_add_instructions]() -> u32 { + return modulo_add_instructions[RandInt(0, modulo_add_instructions.size() - 1)].Generate(); + }); + } + + SECTION("Parallel Subtract (Modulo)") { + FuzzJitArm(1, 1, 10000, [&modulo_sub_instructions]() -> u32 { + return modulo_sub_instructions[RandInt(0, modulo_sub_instructions.size() - 1)].Generate(); + }); + } + + SECTION("Parallel Exchange (Modulo)") { + FuzzJitArm(1, 1, 10000, [&modulo_exchange_instructions]() -> u32 { + return modulo_exchange_instructions[RandInt(0, modulo_exchange_instructions.size() - 1)].Generate(); }); }