IR: Generalise SignedSaturated{Add,Sub} to support more bitwidths
This commit is contained in:
parent
71db0e67ae
commit
10e196480f
4 changed files with 119 additions and 43 deletions
|
@ -4,11 +4,14 @@
|
||||||
* General Public License version 2 or any later version.
|
* General Public License version 2 or any later version.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
#include "backend_x64/block_of_code.h"
|
#include "backend_x64/block_of_code.h"
|
||||||
#include "backend_x64/emit_x64.h"
|
#include "backend_x64/emit_x64.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_util.h"
|
#include "common/bit_util.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/mp/integer.h"
|
||||||
#include "frontend/ir/basic_block.h"
|
#include "frontend/ir/basic_block.h"
|
||||||
#include "frontend/ir/microinstruction.h"
|
#include "frontend/ir/microinstruction.h"
|
||||||
#include "frontend/ir/opcodes.h"
|
#include "frontend/ir/opcodes.h"
|
||||||
|
@ -16,22 +19,53 @@
|
||||||
namespace Dynarmic::BackendX64 {
|
namespace Dynarmic::BackendX64 {
|
||||||
|
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
namespace mp = Dynarmic::Common::mp;
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
|
namespace {
|
||||||
|
|
||||||
|
enum class Op {
|
||||||
|
Add,
|
||||||
|
Sub,
|
||||||
|
};
|
||||||
|
|
||||||
|
template<Op op, size_t size>
|
||||||
|
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||||
Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
code.mov(overflow, result);
|
result.setBit(size);
|
||||||
code.shr(overflow, 31);
|
addend.setBit(size);
|
||||||
code.add(overflow, 0x7FFFFFFF);
|
overflow.setBit(size);
|
||||||
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
|
||||||
|
constexpr u64 int_max = static_cast<u64>(std::numeric_limits<mp::signed_integer_of_size<size>>::max());
|
||||||
|
if constexpr (size < 64) {
|
||||||
|
code.xor_(overflow.cvt32(), overflow.cvt32());
|
||||||
|
code.bt(result.cvt32(), size - 1);
|
||||||
|
code.adc(overflow.cvt32(), int_max);
|
||||||
|
} else {
|
||||||
|
code.mov(overflow, int_max);
|
||||||
|
code.bt(result, 63);
|
||||||
|
code.adc(overflow, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
|
||||||
|
|
||||||
|
if constexpr (op == Op::Add) {
|
||||||
code.add(result, addend);
|
code.add(result, addend);
|
||||||
|
} else {
|
||||||
|
code.sub(result, addend);
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (size < 64) {
|
||||||
|
code.cmovo(result.cvt32(), overflow.cvt32());
|
||||||
|
} else {
|
||||||
code.cmovo(result, overflow);
|
code.cmovo(result, overflow);
|
||||||
|
}
|
||||||
|
|
||||||
if (overflow_inst) {
|
if (overflow_inst) {
|
||||||
code.seto(overflow.cvt8());
|
code.seto(overflow.cvt8());
|
||||||
|
@ -43,30 +77,38 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
|
} // anonymous namespace
|
||||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
||||||
Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
|
||||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
|
|
||||||
code.mov(overflow, result);
|
|
||||||
code.shr(overflow, 31);
|
|
||||||
code.add(overflow, 0x7FFFFFFF);
|
|
||||||
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
|
||||||
code.sub(result, subend);
|
|
||||||
code.cmovo(result, overflow);
|
|
||||||
|
|
||||||
if (overflow_inst) {
|
|
||||||
code.seto(overflow.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
|
||||||
ctx.EraseInstruction(overflow_inst);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
|
@ -481,15 +481,43 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
|
||||||
return Inst<U64>(Opcode::MinUnsigned64, a, b);
|
return Inst<U64>(Opcode::MinUnsigned64, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
|
ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
|
||||||
auto result = Inst<U32>(Opcode::SignedSaturatedAdd, a, b);
|
ASSERT(a.GetType() == b.GetType());
|
||||||
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
const auto result = [&]() -> IR::UAny {
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case IR::Type::U8:
|
||||||
|
return Inst<U8>(Opcode::SignedSaturatedAdd8, a, b);
|
||||||
|
case IR::Type::U16:
|
||||||
|
return Inst<U16>(Opcode::SignedSaturatedAdd16, a, b);
|
||||||
|
case IR::Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SignedSaturatedAdd32, a, b);
|
||||||
|
case IR::Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SignedSaturatedAdd64, a, b);
|
||||||
|
default:
|
||||||
|
return IR::UAny{};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
return {result, overflow};
|
return {result, overflow};
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<U32> IREmitter::SignedSaturatedSub(const U32& a, const U32& b) {
|
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
|
||||||
auto result = Inst<U32>(Opcode::SignedSaturatedSub, a, b);
|
ASSERT(a.GetType() == b.GetType());
|
||||||
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
const auto result = [&]() -> IR::UAny {
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case IR::Type::U8:
|
||||||
|
return Inst<U8>(Opcode::SignedSaturatedSub8, a, b);
|
||||||
|
case IR::Type::U16:
|
||||||
|
return Inst<U16>(Opcode::SignedSaturatedSub16, a, b);
|
||||||
|
case IR::Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SignedSaturatedSub32, a, b);
|
||||||
|
case IR::Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SignedSaturatedSub64, a, b);
|
||||||
|
default:
|
||||||
|
return IR::UAny{};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
return {result, overflow};
|
return {result, overflow};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -142,8 +142,8 @@ public:
|
||||||
U32U64 MinSigned(const U32U64& a, const U32U64& b);
|
U32U64 MinSigned(const U32U64& a, const U32U64& b);
|
||||||
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
|
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
|
||||||
|
|
||||||
ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
|
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||||
ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
|
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
|
||||||
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||||
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||||
|
|
||||||
|
|
|
@ -77,9 +77,9 @@ A64OPC(GetTPIDRRO, T::U64,
|
||||||
OPCODE(PushRSB, T::Void, T::U64 )
|
OPCODE(PushRSB, T::Void, T::U64 )
|
||||||
|
|
||||||
// Pseudo-operation, handled specially at final emit
|
// Pseudo-operation, handled specially at final emit
|
||||||
OPCODE(GetCarryFromOp, T::U1, T::U32 )
|
OPCODE(GetCarryFromOp, T::U1, T::Opaque )
|
||||||
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
|
OPCODE(GetOverflowFromOp, T::U1, T::Opaque )
|
||||||
OPCODE(GetGEFromOp, T::U32, T::U32 )
|
OPCODE(GetGEFromOp, T::U32, T::Opaque )
|
||||||
OPCODE(GetNZCVFromOp, T::NZCVFlags, T::Opaque )
|
OPCODE(GetNZCVFromOp, T::NZCVFlags, T::Opaque )
|
||||||
|
|
||||||
OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 )
|
OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 )
|
||||||
|
@ -155,10 +155,16 @@ OPCODE(MinUnsigned32, T::U32, T::U32,
|
||||||
OPCODE(MinUnsigned64, T::U64, T::U64, T::U64 )
|
OPCODE(MinUnsigned64, T::U64, T::U64, T::U64 )
|
||||||
|
|
||||||
// Saturated instructions
|
// Saturated instructions
|
||||||
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
OPCODE(SignedSaturatedAdd8, T::U8, T::U8, T::U8 )
|
||||||
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
|
OPCODE(SignedSaturatedAdd16, T::U16, T::U16, T::U16 )
|
||||||
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
|
OPCODE(SignedSaturatedAdd32, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(SignedSaturatedAdd64, T::U64, T::U64, T::U64 )
|
||||||
|
OPCODE(SignedSaturatedSub8, T::U8, T::U8, T::U8 )
|
||||||
|
OPCODE(SignedSaturatedSub16, T::U16, T::U16, T::U16 )
|
||||||
|
OPCODE(SignedSaturatedSub32, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(SignedSaturatedSub64, T::U64, T::U64, T::U64 )
|
||||||
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
|
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
|
||||||
|
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
|
||||||
|
|
||||||
// Packed instructions
|
// Packed instructions
|
||||||
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
||||||
|
|
Loading…
Reference in a new issue