Implement SHADD8 and SHADD16 (#47)

This commit is contained in:
Merry 2016-11-26 18:12:29 +00:00 committed by GitHub
parent 11ae8d1ffa
commit cb17f9a3ed
5 changed files with 83 additions and 3 deletions

View file

@ -1341,6 +1341,62 @@ void EmitX64::EmitPackedHalvingAddU16(IR::Block& block, IR::Inst* inst) {
code->add(result, xor_a_b);
}
void EmitX64::EmitPackedHalvingAddS8(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 and_a_b = reg_a;
Xbyak::Reg32 result = reg_a;
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
// carry propagates the sign bit from (x^y)>>1 upwards by one.
code->mov(xor_a_b, reg_a);
code->and(and_a_b, reg_b);
code->xor(xor_a_b, reg_b);
code->mov(carry, xor_a_b);
code->and(carry, 0x80808080);
code->shr(xor_a_b, 1);
code->and(xor_a_b, 0x7F7F7F7F);
code->add(result, xor_a_b);
code->xor(result, carry);
}
void EmitX64::EmitPackedHalvingAddS16(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 and_a_b = reg_a;
Xbyak::Reg32 result = reg_a;
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
// carry propagates the sign bit from (x^y)>>1 upwards by one.
code->mov(xor_a_b, reg_a);
code->and(and_a_b, reg_b);
code->xor(xor_a_b, reg_b);
code->mov(carry, xor_a_b);
code->and(carry, 0x80008000);
code->shr(xor_a_b, 1);
code->and(xor_a_b, 0x7FFF7FFF);
code->add(result, xor_a_b);
code->xor(result, carry);
}
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
}

View file

@ -321,13 +321,21 @@ Value IREmitter::ByteReverseDual(const Value& a) {
}
Value IREmitter::PackedHalvingAddU8(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingAddU8, { a, b });
return Inst(Opcode::PackedHalvingAddU8, {a, b});
}
Value IREmitter::PackedHalvingAddS8(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingAddS8, {a, b});
}
Value IREmitter::PackedHalvingAddU16(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingAddU16, {a, b});
}
Value IREmitter::PackedHalvingAddS16(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingAddS16, {a, b});
}
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
}

View file

@ -122,7 +122,9 @@ public:
Value ByteReverseHalf(const Value& a);
Value ByteReverseDual(const Value& a);
Value PackedHalvingAddU8(const Value& a, const Value& b);
Value PackedHalvingAddS8(const Value& a, const Value& b);
Value PackedHalvingAddU16(const Value& a, const Value& b);
Value PackedHalvingAddS16(const Value& a, const Value& b);
Value PackedSaturatedAddU8(const Value& a, const Value& b);
Value PackedSaturatedAddS8(const Value& a, const Value& b);
Value PackedSaturatedSubU8(const Value& a, const Value& b);

View file

@ -72,7 +72,9 @@ OPCODE(ByteReverseWord, T::U32, T::U32
OPCODE(ByteReverseHalf, T::U16, T::U16 )
OPCODE(ByteReverseDual, T::U64, T::U64 )
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )

View file

@ -155,11 +155,23 @@ bool ArmTranslatorVisitor::arm_UQSUB16(Cond cond, Reg n, Reg d, Reg m) {
// Parallel Add/Subtract (Halving) instructions
bool ArmTranslatorVisitor::arm_SHADD8(Cond cond, Reg n, Reg d, Reg m) {
return InterpretThisInstruction();
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto result = ir.PackedHalvingAddS8(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result);
}
return true;
}
bool ArmTranslatorVisitor::arm_SHADD16(Cond cond, Reg n, Reg d, Reg m) {
return InterpretThisInstruction();
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto result = ir.PackedHalvingAddS16(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result);
}
return true;
}
bool ArmTranslatorVisitor::arm_SHASX(Cond cond, Reg n, Reg d, Reg m) {