IR: Generalise SignedSaturated{Add,Sub} to support more bitwidths

2018-07-30 10:59:52 +01:00 · 2018-07-30 10:59:52 +01:00 · 10e196480f
commit 10e196480f
parent 71db0e67ae
4 changed files with 119 additions and 43 deletions
--- a/src/backend_x64/emit_x64_saturation.cpp
+++ b/src/backend_x64/emit_x64_saturation.cpp
@ -4,11 +4,14 @@
 * General Public License version 2 or any later version.
 */

+#include <limits>
+
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
+#include "common/mp/integer.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -16,22 +19,53 @@
 namespace Dynarmic::BackendX64 {

 using namespace Xbyak::util;
+namespace mp = Dynarmic::Common::mp;

-void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
+namespace {
+
+enum class Op {
+    Add,
+    Sub,
+};
+
+template<Op op, size_t size>
+void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);

    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

-    Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
-    Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
-    Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
+    Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]);
+    Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]);
+    Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();

-    code.mov(overflow, result);
-    code.shr(overflow, 31);
-    code.add(overflow, 0x7FFFFFFF);
-    // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
+    result.setBit(size);
+    addend.setBit(size);
+    overflow.setBit(size);
+
+    constexpr u64 int_max = static_cast<u64>(std::numeric_limits<mp::signed_integer_of_size<size>>::max());
+    if constexpr (size < 64) {
+        code.xor_(overflow.cvt32(), overflow.cvt32());
+        code.bt(result.cvt32(), size - 1);
+        code.adc(overflow.cvt32(), int_max);
+    } else {
+        code.mov(overflow, int_max);
+        code.bt(result, 63);
+        code.adc(overflow, 0);
+    }
+
+    // overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
+
+    if constexpr (op == Op::Add) {
        code.add(result, addend);
+    } else {
+        code.sub(result, addend);
+    }
+
+    if constexpr (size < 64) {
+        code.cmovo(result.cvt32(), overflow.cvt32());
+    } else {
        code.cmovo(result, overflow);
+    }

    if (overflow_inst) {
        code.seto(overflow.cvt8());
@ -43,30 +77,38 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
    ctx.reg_alloc.DefineValue(inst, result);
 }

-void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
-    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
+} // anonymous namespace

-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-
-    Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
-    Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
-    Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
-
-    code.mov(overflow, result);
-    code.shr(overflow, 31);
-    code.add(overflow, 0x7FFFFFFF);
-    // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
-    code.sub(result, subend);
-    code.cmovo(result, overflow);
-
-    if (overflow_inst) {
-        code.seto(overflow.cvt8());
-
-        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
-        ctx.EraseInstruction(overflow_inst);
+void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
 }

-    ctx.reg_alloc.DefineValue(inst, result);
+void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
+}
+
+void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
+    EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
 }

 void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -481,15 +481,43 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
    return Inst<U64>(Opcode::MinUnsigned64, a, b);
 }

-ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
-    auto result = Inst<U32>(Opcode::SignedSaturatedAdd, a, b);
-    auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
+ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
+    ASSERT(a.GetType() == b.GetType());
+    const auto result = [&]() -> IR::UAny {
+        switch (a.GetType()) {
+        case IR::Type::U8:
+            return Inst<U8>(Opcode::SignedSaturatedAdd8, a, b);
+        case IR::Type::U16:
+            return Inst<U16>(Opcode::SignedSaturatedAdd16, a, b);
+        case IR::Type::U32:
+            return Inst<U32>(Opcode::SignedSaturatedAdd32, a, b);
+        case IR::Type::U64:
+            return Inst<U64>(Opcode::SignedSaturatedAdd64, a, b);
+        default:
+            return IR::UAny{};
+        }
+    }();
+    const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
    return {result, overflow};
 }

-ResultAndOverflow<U32> IREmitter::SignedSaturatedSub(const U32& a, const U32& b) {
-    auto result = Inst<U32>(Opcode::SignedSaturatedSub, a, b);
-    auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
+ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
+    ASSERT(a.GetType() == b.GetType());
+    const auto result = [&]() -> IR::UAny {
+        switch (a.GetType()) {
+        case IR::Type::U8:
+            return Inst<U8>(Opcode::SignedSaturatedSub8, a, b);
+        case IR::Type::U16:
+            return Inst<U16>(Opcode::SignedSaturatedSub16, a, b);
+        case IR::Type::U32:
+            return Inst<U32>(Opcode::SignedSaturatedSub32, a, b);
+        case IR::Type::U64:
+            return Inst<U64>(Opcode::SignedSaturatedSub64, a, b);
+        default:
+            return IR::UAny{};
+        }
+    }();
+    const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
    return {result, overflow};
 }

--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -142,8 +142,8 @@ public:
    U32U64 MinSigned(const U32U64& a, const U32U64& b);
    U32U64 MinUnsigned(const U32U64& a, const U32U64& b);

-    ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
-    ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
+    ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
+    ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
    ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
    ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);

--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -77,9 +77,9 @@ A64OPC(GetTPIDRRO,                              T::U64,
 OPCODE(PushRSB,                                 T::Void,        T::U64                                          )

 // Pseudo-operation, handled specially at final emit
-OPCODE(GetCarryFromOp,                          T::U1,          T::U32                                          )
-OPCODE(GetOverflowFromOp,                       T::U1,          T::U32                                          )
-OPCODE(GetGEFromOp,                             T::U32,         T::U32                                          )
+OPCODE(GetCarryFromOp,                          T::U1,          T::Opaque                                       )
+OPCODE(GetOverflowFromOp,                       T::U1,          T::Opaque                                       )
+OPCODE(GetGEFromOp,                             T::U32,         T::Opaque                                       )
 OPCODE(GetNZCVFromOp,                           T::NZCVFlags,   T::Opaque                                       )

 OPCODE(NZCVFromPackedFlags,                     T::NZCVFlags,   T::U32                                          )
@ -155,10 +155,16 @@ OPCODE(MinUnsigned32,                           T::U32,         T::U32,
 OPCODE(MinUnsigned64,                           T::U64,         T::U64,         T::U64                          )

 // Saturated instructions
-OPCODE(SignedSaturatedAdd,                      T::U32,         T::U32,         T::U32                          )
-OPCODE(SignedSaturatedSub,                      T::U32,         T::U32,         T::U32                          )
-OPCODE(UnsignedSaturation,                      T::U32,         T::U32,         T::U8                           )
+OPCODE(SignedSaturatedAdd8,                     T::U8,          T::U8,          T::U8                           )
+OPCODE(SignedSaturatedAdd16,                    T::U16,         T::U16,         T::U16                          )
+OPCODE(SignedSaturatedAdd32,                    T::U32,         T::U32,         T::U32                          )
+OPCODE(SignedSaturatedAdd64,                    T::U64,         T::U64,         T::U64                          )
+OPCODE(SignedSaturatedSub8,                     T::U8,          T::U8,          T::U8                           )
+OPCODE(SignedSaturatedSub16,                    T::U16,         T::U16,         T::U16                          )
+OPCODE(SignedSaturatedSub32,                    T::U32,         T::U32,         T::U32                          )
+OPCODE(SignedSaturatedSub64,                    T::U64,         T::U64,         T::U64                          )
 OPCODE(SignedSaturation,                        T::U32,         T::U32,         T::U8                           )
+OPCODE(UnsignedSaturation,                      T::U32,         T::U32,         T::U8                           )

 // Packed instructions
 OPCODE(PackedAddU8,                             T::U32,         T::U32,         T::U32                          )