IR: Generalise SignedSaturated{Add,Sub} to support more bitwidths

2018-07-30 10:59:52 +01:00 · 2018-07-30 10:59:52 +01:00 · 10e196480f
commit 10e196480f
parent 71db0e67ae
4 changed files with 119 additions and 43 deletions
--- a/src/backend_x64/emit_x64_saturation.cpp
+++ b/src/backend_x64/emit_x64_saturation.cpp
@ -4,11 +4,14 @@
 * General Public License version 2 or any later version.
 */
 #include <limits>
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/mp/integer.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -16,22 +19,53 @@
 namespace Dynarmic::BackendX64 {
 using namespace Xbyak::util;
 namespace mp = Dynarmic::Common::mp;
-void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
+namespace {
 enum class Op {
    Add,
    Sub,
 };
 template<Op op, size_t size>
 void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
+    Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]);
-    Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
+    Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]);
-    Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
+    Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
-    code.mov(overflow, result);
+    result.setBit(size);
-    code.shr(overflow, 31);
+    addend.setBit(size);
-    code.add(overflow, 0x7FFFFFFF);
+    overflow.setBit(size);
-    // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
+
    constexpr u64 int_max = static_cast<u64>(std::numeric_limits<mp::signed_integer_of_size<size>>::max());
    if constexpr (size < 64) {
        code.xor_(overflow.cvt32(), overflow.cvt32());
        code.bt(result.cvt32(), size - 1);
        code.adc(overflow.cvt32(), int_max);
    } else {
        code.mov(overflow, int_max);
        code.bt(result, 63);
        code.adc(overflow, 0);
    }
    // overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
    if constexpr (op == Op::Add) {
        code.add(result, addend);
    } else {
        code.sub(result, addend);
    }
    if constexpr (size < 64) {
        code.cmovo(result.cvt32(), overflow.cvt32());
    } else {
        code.cmovo(result, overflow);
    }
    if (overflow_inst) {
        code.seto(overflow.cvt8());
@ -43,30 +77,38 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
    ctx.reg_alloc.DefineValue(inst, result);
 }
-void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
+} // anonymous namespace
    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
-
+    EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
    Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
    Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
    Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
    code.mov(overflow, result);
    code.shr(overflow, 31);
    code.add(overflow, 0x7FFFFFFF);
    // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
    code.sub(result, subend);
    code.cmovo(result, overflow);
    if (overflow_inst) {
        code.seto(overflow.cvt8());
        ctx.reg_alloc.DefineValue(overflow_inst, overflow);
        ctx.EraseInstruction(overflow_inst);
 }
-    ctx.reg_alloc.DefineValue(inst, result);
+void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
 }
 void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
    EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
 }
 void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -481,15 +481,43 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
    return Inst<U64>(Opcode::MinUnsigned64, a, b);
 }
-ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
+ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
-    auto result = Inst<U32>(Opcode::SignedSaturatedAdd, a, b);
+    ASSERT(a.GetType() == b.GetType());
-    auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
+    const auto result = [&]() -> IR::UAny {
        switch (a.GetType()) {
        case IR::Type::U8:
            return Inst<U8>(Opcode::SignedSaturatedAdd8, a, b);
        case IR::Type::U16:
            return Inst<U16>(Opcode::SignedSaturatedAdd16, a, b);
        case IR::Type::U32:
            return Inst<U32>(Opcode::SignedSaturatedAdd32, a, b);
        case IR::Type::U64:
            return Inst<U64>(Opcode::SignedSaturatedAdd64, a, b);
        default:
            return IR::UAny{};
        }
    }();
    const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
    return {result, overflow};
 }
-ResultAndOverflow<U32> IREmitter::SignedSaturatedSub(const U32& a, const U32& b) {
+ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
-    auto result = Inst<U32>(Opcode::SignedSaturatedSub, a, b);
+    ASSERT(a.GetType() == b.GetType());
-    auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
+    const auto result = [&]() -> IR::UAny {
        switch (a.GetType()) {
        case IR::Type::U8:
            return Inst<U8>(Opcode::SignedSaturatedSub8, a, b);
        case IR::Type::U16:
            return Inst<U16>(Opcode::SignedSaturatedSub16, a, b);
        case IR::Type::U32:
            return Inst<U32>(Opcode::SignedSaturatedSub32, a, b);
        case IR::Type::U64:
            return Inst<U64>(Opcode::SignedSaturatedSub64, a, b);
        default:
            return IR::UAny{};
        }
    }();
    const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
    return {result, overflow};
 }
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -142,8 +142,8 @@ public:
    U32U64 MinSigned(const U32U64& a, const U32U64& b);
    U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
-    ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
+    ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
-    ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
+    ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
    ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
    ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -77,9 +77,9 @@ A64OPC(GetTPIDRRO,                              T::U64,
 OPCODE(PushRSB,                                 T::Void,        T::U64                                          )
 // Pseudo-operation, handled specially at final emit
-OPCODE(GetCarryFromOp,                          T::U1,          T::U32                                          )
+OPCODE(GetCarryFromOp,                          T::U1,          T::Opaque                                       )
-OPCODE(GetOverflowFromOp,                       T::U1,          T::U32                                          )
+OPCODE(GetOverflowFromOp,                       T::U1,          T::Opaque                                       )
-OPCODE(GetGEFromOp,                             T::U32,         T::U32                                          )
+OPCODE(GetGEFromOp,                             T::U32,         T::Opaque                                       )
 OPCODE(GetNZCVFromOp,                           T::NZCVFlags,   T::Opaque                                       )
 OPCODE(NZCVFromPackedFlags,                     T::NZCVFlags,   T::U32                                          )
@ -155,10 +155,16 @@ OPCODE(MinUnsigned32,                           T::U32,         T::U32,
 OPCODE(MinUnsigned64,                           T::U64,         T::U64,         T::U64                          )
 // Saturated instructions
-OPCODE(SignedSaturatedAdd,                      T::U32,         T::U32,         T::U32                          )
+OPCODE(SignedSaturatedAdd8,                     T::U8,          T::U8,          T::U8                           )
-OPCODE(SignedSaturatedSub,                      T::U32,         T::U32,         T::U32                          )
+OPCODE(SignedSaturatedAdd16,                    T::U16,         T::U16,         T::U16                          )
-OPCODE(UnsignedSaturation,                      T::U32,         T::U32,         T::U8                           )
+OPCODE(SignedSaturatedAdd32,                    T::U32,         T::U32,         T::U32                          )
 OPCODE(SignedSaturatedAdd64,                    T::U64,         T::U64,         T::U64                          )
 OPCODE(SignedSaturatedSub8,                     T::U8,          T::U8,          T::U8                           )
 OPCODE(SignedSaturatedSub16,                    T::U16,         T::U16,         T::U16                          )
 OPCODE(SignedSaturatedSub32,                    T::U32,         T::U32,         T::U32                          )
 OPCODE(SignedSaturatedSub64,                    T::U64,         T::U64,         T::U64                          )
 OPCODE(SignedSaturation,                        T::U32,         T::U32,         T::U8                           )
 OPCODE(UnsignedSaturation,                      T::U32,         T::U32,         T::U8                           )
 // Packed instructions
 OPCODE(PackedAddU8,                             T::U32,         T::U32,         T::U32                          )