Implement UHSUB8 and UHSUB16 (#48)

2016-11-26 18:27:21 +00:00 · 2016-11-26 18:27:21 +00:00 · 0ff8c375af
commit 0ff8c375af
parent cb17f9a3ed
5 changed files with 86 additions and 2 deletions
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@ -1397,6 +1397,66 @@ void EmitX64::EmitPackedHalvingAddS16(IR::Block& block, IR::Inst* inst) {
    code->xor(result, carry);
 }

+void EmitX64::EmitPackedHalvingSubU8(IR::Block& block, IR::Inst* inst) {
+    IR::Value a = inst->GetArg(0);
+    IR::Value b = inst->GetArg(1);
+
+    Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
+    Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
+
+    // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+    code->xor(minuend, subtrahend);
+    code->and(subtrahend, minuend);
+    code->shr(minuend, 1);
+
+    // At this point,
+    // minuend := (a^b) >> 1
+    // subtrahend := (a^b) & b
+
+    // We must now perform a partitioned subtraction.
+    // We can do this because minuend contains 7 bit fields.
+    // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+    // We invert this bit at the end as this tells us if that bit was borrowed from.
+    code->or(minuend, 0x80808080);
+    code->sub(minuend, subtrahend);
+    code->xor(minuend, 0x80808080);
+
+    // minuend now contains the desired result.
+}
+
+void EmitX64::EmitPackedHalvingSubU16(IR::Block& block, IR::Inst* inst) {
+    IR::Value a = inst->GetArg(0);
+    IR::Value b = inst->GetArg(1);
+
+    Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
+    Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
+
+    // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+    // Note that x^y always contains the LSB of the result.
+    // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+    code->xor(minuend, subtrahend);
+    code->and(subtrahend, minuend);
+    code->shr(minuend, 1);
+
+    // At this point,
+    // minuend := (a^b) >> 1
+    // subtrahend := (a^b) & b
+
+    // We must now perform a partitioned subtraction.
+    // We can do this because minuend contains 15 bit fields.
+    // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+    // We invert this bit at the end as this tells us if that bit was borrowed from.
+    code->or(minuend, 0x80008000);
+    code->sub(minuend, subtrahend);
+    code->xor(minuend, 0x80008000);
+
+    // minuend now contains the desired result.
+}
+
 void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
    EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
 }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -328,6 +328,10 @@ Value IREmitter::PackedHalvingAddS8(const Value& a, const Value& b) {
    return Inst(Opcode::PackedHalvingAddS8, {a, b});
 }

+Value IREmitter::PackedHalvingSubU8(const Value& a, const Value& b) {
+    return Inst(Opcode::PackedHalvingSubU8, {a, b});
+}
+
 Value IREmitter::PackedHalvingAddU16(const Value& a, const Value& b) {
    return Inst(Opcode::PackedHalvingAddU16, {a, b});
 }
@ -336,6 +340,10 @@ Value IREmitter::PackedHalvingAddS16(const Value& a, const Value& b) {
    return Inst(Opcode::PackedHalvingAddS16, {a, b});
 }

+Value IREmitter::PackedHalvingSubU16(const Value& a, const Value& b) {
+    return Inst(Opcode::PackedHalvingSubU16, {a, b});
+}
+
 Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
    return Inst(Opcode::PackedSaturatedAddU8, {a, b});
 }
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -123,8 +123,10 @@ public:
    Value ByteReverseDual(const Value& a);
    Value PackedHalvingAddU8(const Value& a, const Value& b);
    Value PackedHalvingAddS8(const Value& a, const Value& b);
+    Value PackedHalvingSubU8(const Value& a, const Value& b);
    Value PackedHalvingAddU16(const Value& a, const Value& b);
    Value PackedHalvingAddS16(const Value& a, const Value& b);
+    Value PackedHalvingSubU16(const Value& a, const Value& b);
    Value PackedSaturatedAddU8(const Value& a, const Value& b);
    Value PackedSaturatedAddS8(const Value& a, const Value& b);
    Value PackedSaturatedSubU8(const Value& a, const Value& b);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -73,8 +73,10 @@ OPCODE(ByteReverseHalf,         T::U16,         T::U16
 OPCODE(ByteReverseDual,         T::U64,         T::U64                                          )
 OPCODE(PackedHalvingAddU8,      T::U32,         T::U32,         T::U32                          )
 OPCODE(PackedHalvingAddS8,      T::U32,         T::U32,         T::U32                          )
+OPCODE(PackedHalvingSubU8,      T::U32,         T::U32,         T::U32                          )
 OPCODE(PackedHalvingAddU16,     T::U32,         T::U32,         T::U32                          )
 OPCODE(PackedHalvingAddS16,     T::U32,         T::U32,         T::U32                          )
+OPCODE(PackedHalvingSubU16,     T::U32,         T::U32,         T::U32                          )
 OPCODE(PackedSaturatedAddU8,    T::U32,         T::U32,         T::U32                          )
 OPCODE(PackedSaturatedAddS8,    T::U32,         T::U32,         T::U32                          )
 OPCODE(PackedSaturatedSubU8,    T::U32,         T::U32,         T::U32                          )
--- a/src/frontend/translate/translate_arm/parallel.cpp
+++ b/src/frontend/translate/translate_arm/parallel.cpp
@ -219,11 +219,23 @@ bool ArmTranslatorVisitor::arm_UHSAX(Cond cond, Reg n, Reg d, Reg m) {
 }

 bool ArmTranslatorVisitor::arm_UHSUB8(Cond cond, Reg n, Reg d, Reg m) {
-    return InterpretThisInstruction();
+    if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
+        return UnpredictableInstruction();
+    if (ConditionPassed(cond)) {
+        auto result = ir.PackedHalvingSubU8(ir.GetRegister(n), ir.GetRegister(m));
+        ir.SetRegister(d, result);
+    }
+    return true;
 }

 bool ArmTranslatorVisitor::arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m) {
-    return InterpretThisInstruction();
+    if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
+        return UnpredictableInstruction();
+    if (ConditionPassed(cond)) {
+        auto result = ir.PackedHalvingSubU16(ir.GetRegister(n), ir.GetRegister(m));
+        ir.SetRegister(d, result);
+    }
+    return true;
 }

 } // namespace Arm