Implement SHSUB8 and SHSUB16 (#74)

* Implement IR operations PackedHalvingSubS8 and PackedHalvingSubS16
This commit is contained in:
Fernando Sahmkow 2016-12-22 07:02:24 -05:00 committed by Merry
parent 967f3cf7e1
commit 677f62dd6f
5 changed files with 95 additions and 5 deletions

View file

@ -1928,6 +1928,41 @@ void EmitX64::EmitPackedHalvingSubU8(IR::Block&, IR::Inst* inst) {
// minuend now contains the desired result.
}
void EmitX64::EmitPackedHalvingSubS8(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->xor(minuend, subtrahend);
code->and(subtrahend, minuend);
code->mov(carry, minuend);
code->and(carry, 0x80808080);
code->shr(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
// subtrahend := (a^b) & b
// carry := (a^b) & 0x80808080
// We must now perform a partitioned subtraction.
// We can do this because minuend contains 7 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
// We then sign extend the result into this bit.
code->or(minuend, 0x80808080);
code->sub(minuend, subtrahend);
code->xor(minuend, 0x80808080);
code->xor(minuend, carry);
}
void EmitX64::EmitPackedHalvingSubU16(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
@ -1954,8 +1989,41 @@ void EmitX64::EmitPackedHalvingSubU16(IR::Block&, IR::Inst* inst) {
code->or(minuend, 0x80008000);
code->sub(minuend, subtrahend);
code->xor(minuend, 0x80008000);
}
// minuend now contains the desired result.
void EmitX64::EmitPackedHalvingSubS16(IR::Block&, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->xor(minuend, subtrahend);
code->and(subtrahend, minuend);
code->mov(carry, minuend);
code->and(carry, 0x80008000);
code->shr(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
// subtrahend := (a^b) & b
// carry := (a^b) & 0x80008000
// We must now perform a partitioned subtraction.
// We can do this because minuend contains 7 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
// We then sign extend the result into this bit.
code->or(minuend, 0x80008000);
code->sub(minuend, subtrahend);
code->xor(minuend, 0x80008000);
code->xor(minuend, carry);
}
static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {

View file

@ -422,6 +422,10 @@ Value IREmitter::PackedHalvingSubU8(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingSubU8, {a, b});
}
Value IREmitter::PackedHalvingSubS8(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingSubS8, {a, b});
}
Value IREmitter::PackedHalvingAddU16(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingAddU16, {a, b});
}
@ -434,6 +438,10 @@ Value IREmitter::PackedHalvingSubU16(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingSubU16, {a, b});
}
Value IREmitter::PackedHalvingSubS16(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingSubS16, {a, b});
}
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
}

View file

@ -152,9 +152,11 @@ public:
Value PackedHalvingAddU8(const Value& a, const Value& b);
Value PackedHalvingAddS8(const Value& a, const Value& b);
Value PackedHalvingSubU8(const Value& a, const Value& b);
Value PackedHalvingSubS8(const Value& a, const Value& b);
Value PackedHalvingAddU16(const Value& a, const Value& b);
Value PackedHalvingAddS16(const Value& a, const Value& b);
Value PackedHalvingSubU16(const Value& a, const Value& b);
Value PackedHalvingSubS16(const Value& a, const Value& b);
Value PackedSaturatedAddU8(const Value& a, const Value& b);
Value PackedSaturatedAddS8(const Value& a, const Value& b);
Value PackedSaturatedSubU8(const Value& a, const Value& b);

View file

@ -94,9 +94,11 @@ OPCODE(PackedSubS16, T::U32, T::U32, T::U32
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )

View file

@ -268,13 +268,23 @@ bool ArmTranslatorVisitor::arm_SHSAX(Cond cond, Reg n, Reg d, Reg m) {
}
bool ArmTranslatorVisitor::arm_SHSUB8(Cond cond, Reg n, Reg d, Reg m) {
UNUSED(cond, n, d, m);
return InterpretThisInstruction();
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto result = ir.PackedHalvingSubS8(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result);
}
return true;
}
bool ArmTranslatorVisitor::arm_SHSUB16(Cond cond, Reg n, Reg d, Reg m) {
UNUSED(cond, n, d, m);
return InterpretThisInstruction();
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto result = ir.PackedHalvingSubS16(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result);
}
return true;
}
bool ArmTranslatorVisitor::arm_UHADD8(Cond cond, Reg n, Reg d, Reg m) {