From d1e0a29cd92043f71bd306df2df800abbef4af7c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 25 Nov 2017 16:33:48 +0000 Subject: [PATCH] Implement IR instruction PackedSelect, reimplement SEL --- src/backend_x64/emit_x64.cpp | 18 ++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 4 ++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 1 + src/frontend/translate/translate_arm/misc.cpp | 15 +-------------- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 41ed3291..9f98801d 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -2128,6 +2128,24 @@ void EmitX64::EmitPackedAbsDiffSumS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw); } +void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { + auto args = reg_alloc.GetArgumentInfo(inst); + + Xbyak::Reg32 ge = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 to = reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 from = reg_alloc.UseScratchGpr(args[2]).cvt32(); + Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); + + code->mov(tmp, 0x01010101); + code->pdep(ge, ge, tmp); + code->imul(ge, ge, 0xFF); + code->and_(from, ge); + code->andn(to, ge, to); + code->or_(from, to); + + reg_alloc.DefineValue(inst, from); +} + static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { using namespace Xbyak::util; Xbyak::Label end; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index cd0c8c86..5c8ad05e 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -510,6 +510,10 @@ Value IREmitter::PackedAbsDiffSumS8(const Value& a, const Value& b) { return Inst(Opcode::PackedAbsDiffSumS8, {a, b}); } +Value IREmitter::PackedSelect(const Value& ge, const Value& a, const Value& b) { + return Inst(Opcode::PackedSelect, {ge, a, b}); +} + Value IREmitter::TransferToFP32(const Value& a) { return Inst(Opcode::TransferToFP32, {a}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 74c1f784..a6391bd3 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -174,6 +174,7 @@ public: Value PackedSaturatedSubU16(const Value& a, const Value& b); Value PackedSaturatedSubS16(const Value& a, const Value& b); Value PackedAbsDiffSumS8(const Value& a, const Value& b); + Value PackedSelect(const Value& ge, const Value& a, const Value& b); Value TransferToFP32(const Value& a); Value TransferToFP64(const Value& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index edf8cb6a..dc1b594d 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -115,6 +115,7 @@ OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 ) OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 ) +OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 ) // Floating-point operations OPCODE(TransferToFP32, T::F32, T::U32 ) diff --git a/src/frontend/translate/translate_arm/misc.cpp b/src/frontend/translate/translate_arm/misc.cpp index e6e806a0..700565ee 100644 --- a/src/frontend/translate/translate_arm/misc.cpp +++ b/src/frontend/translate/translate_arm/misc.cpp @@ -23,22 +23,9 @@ bool ArmTranslatorVisitor::arm_SEL(Cond cond, Reg n, Reg d, Reg m) { return UnpredictableInstruction(); if (ConditionPassed(cond)) { - auto ge = ir.GetGEFlags(); - - // Perform some arithmetic to expand 0bXYZW into 0bXXXXXXXXYYYYYYYYZZZZZZZZWWWWWWWW => 0xXXYYZZWW - // The logic behind this is as follows: - // 0000 0000 0000 0000 | 0000 0000 0000 xyzw - // 0000 000x yzw0 00xy | zw00 0xyz w000 xyzw (x * 0x00204081) - // 0000 000x 0000 000y | 0000 000z 0000 000w (x & 0x01010101) - // xxxx xxxx yyyy yyyy | zzzz zzzz wwww wwww (x * 0xff) - - auto x2 = ir.Mul(ge, ir.Imm32(0x00204081)); - auto x3 = ir.And(x2, ir.Imm32(0x01010101)); - auto mask = ir.Mul(x3, ir.Imm32(0xFF)); - auto to = ir.GetRegister(m); auto from = ir.GetRegister(n); - auto result = ir.Or(ir.And(from, mask), ir.And(to, ir.Not(mask))); + auto result = ir.PackedSelect(ir.GetGEFlags(), to, from); ir.SetRegister(d, result); }