ir: Add opcodes for unsigned saturating add and subtract

This commit is contained in:
Lioncash 2018-08-13 11:45:35 -04:00 committed by MerryMage
parent c41b5a3492
commit acbaf04fef
4 changed files with 158 additions and 37 deletions

View file

@ -77,6 +77,45 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(inst, result);
}
template<Op op, size_t size>
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]);
op_result.setBit(size);
addend.setBit(size);
if constexpr (op == Op::Add) {
code.add(op_result, addend);
} else {
code.sub(op_result, addend);
}
constexpr u64 boundary = op == Op::Add ? std::numeric_limits<mp::unsigned_integer_of_size<size>>::max()
: 0;
code.mov(addend, boundary);
if constexpr (size < 64) {
code.cmovae(addend.cvt32(), op_result.cvt32());
} else {
code.cmovae(addend, op_result);
}
if (overflow_inst) {
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.setb(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, addend);
}
} // anonymous namespace
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
@ -111,36 +150,6 @@ void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
u32 saturated_value = (1u << N) - 1;
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.xor_(overflow, overflow);
code.cmp(reg_a, saturated_value);
code.mov(result, saturated_value);
code.cmovle(result, overflow);
code.cmovbe(result, reg_a);
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
@ -190,4 +199,66 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
u32 saturated_value = (1u << N) - 1;
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.xor_(overflow, overflow);
code.cmp(reg_a, saturated_value);
code.mov(result, saturated_value);
code.cmovle(result, overflow);
code.cmovbe(result, reg_a);
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, result);
}
} // namespace Dynarmic::BackendX64

View file

@ -521,16 +521,56 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny&
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to <= 31);
auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
case IR::Type::U8:
return Inst<U8>(Opcode::UnsignedSaturatedAdd8, a, b);
case IR::Type::U16:
return Inst<U16>(Opcode::UnsignedSaturatedAdd16, a, b);
case IR::Type::U32:
return Inst<U32>(Opcode::UnsignedSaturatedAdd32, a, b);
case IR::Type::U64:
return Inst<U64>(Opcode::UnsignedSaturatedAdd64, a, b);
default:
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
case IR::Type::U8:
return Inst<U8>(Opcode::UnsignedSaturatedSub8, a, b);
case IR::Type::U16:
return Inst<U16>(Opcode::UnsignedSaturatedSub16, a, b);
case IR::Type::U32:
return Inst<U32>(Opcode::UnsignedSaturatedSub32, a, b);
case IR::Type::U64:
return Inst<U64>(Opcode::UnsignedSaturatedSub64, a, b);
default:
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
ASSERT(bit_size_to_saturate_to <= 31);
auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}

View file

@ -144,8 +144,10 @@ public:
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> UnsignedSaturatedSub(const UAny& a, const UAny& b);
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
ResultAndGE<U32> PackedAddU8(const U32& a, const U32& b);
ResultAndGE<U32> PackedAddS8(const U32& a, const U32& b);

View file

@ -164,6 +164,14 @@ OPCODE(SignedSaturatedSub16, T::U16, T::U16,
OPCODE(SignedSaturatedSub32, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedSub64, T::U64, T::U64, T::U64 )
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
OPCODE(UnsignedSaturatedAdd8, T::U8, T::U8, T::U8 )
OPCODE(UnsignedSaturatedAdd16, T::U16, T::U16, T::U16 )
OPCODE(UnsignedSaturatedAdd32, T::U32, T::U32, T::U32 )
OPCODE(UnsignedSaturatedAdd64, T::U64, T::U64, T::U64 )
OPCODE(UnsignedSaturatedSub8, T::U8, T::U8, T::U8 )
OPCODE(UnsignedSaturatedSub16, T::U16, T::U16, T::U16 )
OPCODE(UnsignedSaturatedSub32, T::U32, T::U32, T::U32 )
OPCODE(UnsignedSaturatedSub64, T::U64, T::U64, T::U64 )
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
// Packed instructions