Implement IR instruction PackedSelect, reimplement SEL

This commit is contained in:
MerryMage 2017-11-25 16:33:48 +00:00
parent 18f11972c6
commit d1e0a29cd9
5 changed files with 25 additions and 14 deletions

View file

@ -2128,6 +2128,24 @@ void EmitX64::EmitPackedAbsDiffSumS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst*
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw); EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw);
} }
void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 ge = reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 to = reg_alloc.UseScratchGpr(args[1]).cvt32();
Xbyak::Reg32 from = reg_alloc.UseScratchGpr(args[2]).cvt32();
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp, 0x01010101);
code->pdep(ge, ge, tmp);
code->imul(ge, ge, 0xFF);
code->and_(from, ge);
code->andn(to, ge, to);
code->or_(from, to);
reg_alloc.DefineValue(inst, from);
}
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util; using namespace Xbyak::util;
Xbyak::Label end; Xbyak::Label end;

View file

@ -510,6 +510,10 @@ Value IREmitter::PackedAbsDiffSumS8(const Value& a, const Value& b) {
return Inst(Opcode::PackedAbsDiffSumS8, {a, b}); return Inst(Opcode::PackedAbsDiffSumS8, {a, b});
} }
Value IREmitter::PackedSelect(const Value& ge, const Value& a, const Value& b) {
return Inst(Opcode::PackedSelect, {ge, a, b});
}
Value IREmitter::TransferToFP32(const Value& a) { Value IREmitter::TransferToFP32(const Value& a) {
return Inst(Opcode::TransferToFP32, {a}); return Inst(Opcode::TransferToFP32, {a});
} }

View file

@ -174,6 +174,7 @@ public:
Value PackedSaturatedSubU16(const Value& a, const Value& b); Value PackedSaturatedSubU16(const Value& a, const Value& b);
Value PackedSaturatedSubS16(const Value& a, const Value& b); Value PackedSaturatedSubS16(const Value& a, const Value& b);
Value PackedAbsDiffSumS8(const Value& a, const Value& b); Value PackedAbsDiffSumS8(const Value& a, const Value& b);
Value PackedSelect(const Value& ge, const Value& a, const Value& b);
Value TransferToFP32(const Value& a); Value TransferToFP32(const Value& a);
Value TransferToFP64(const Value& a); Value TransferToFP64(const Value& a);

View file

@ -115,6 +115,7 @@ OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 ) OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
// Floating-point operations // Floating-point operations
OPCODE(TransferToFP32, T::F32, T::U32 ) OPCODE(TransferToFP32, T::F32, T::U32 )

View file

@ -23,22 +23,9 @@ bool ArmTranslatorVisitor::arm_SEL(Cond cond, Reg n, Reg d, Reg m) {
return UnpredictableInstruction(); return UnpredictableInstruction();
if (ConditionPassed(cond)) { if (ConditionPassed(cond)) {
auto ge = ir.GetGEFlags();
// Perform some arithmetic to expand 0bXYZW into 0bXXXXXXXXYYYYYYYYZZZZZZZZWWWWWWWW => 0xXXYYZZWW
// The logic behind this is as follows:
// 0000 0000 0000 0000 | 0000 0000 0000 xyzw
// 0000 000x yzw0 00xy | zw00 0xyz w000 xyzw (x * 0x00204081)
// 0000 000x 0000 000y | 0000 000z 0000 000w (x & 0x01010101)
// xxxx xxxx yyyy yyyy | zzzz zzzz wwww wwww (x * 0xff)
auto x2 = ir.Mul(ge, ir.Imm32(0x00204081));
auto x3 = ir.And(x2, ir.Imm32(0x01010101));
auto mask = ir.Mul(x3, ir.Imm32(0xFF));
auto to = ir.GetRegister(m); auto to = ir.GetRegister(m);
auto from = ir.GetRegister(n); auto from = ir.GetRegister(n);
auto result = ir.Or(ir.And(from, mask), ir.And(to, ir.Not(mask))); auto result = ir.PackedSelect(ir.GetGEFlags(), to, from);
ir.SetRegister(d, result); ir.SetRegister(d, result);
} }