ir: Add opcodes for unsigned saturating add and subtract
This commit is contained in:
parent
c41b5a3492
commit
acbaf04fef
4 changed files with 158 additions and 37 deletions
|
@ -77,6 +77,45 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
template<Op op, size_t size>
|
||||
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||
|
||||
op_result.setBit(size);
|
||||
addend.setBit(size);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
code.add(op_result, addend);
|
||||
} else {
|
||||
code.sub(op_result, addend);
|
||||
}
|
||||
|
||||
constexpr u64 boundary = op == Op::Add ? std::numeric_limits<mp::unsigned_integer_of_size<size>>::max()
|
||||
: 0;
|
||||
code.mov(addend, boundary);
|
||||
|
||||
if constexpr (size < 64) {
|
||||
code.cmovae(addend.cvt32(), op_result.cvt32());
|
||||
} else {
|
||||
code.cmovae(addend, op_result);
|
||||
}
|
||||
|
||||
if (overflow_inst) {
|
||||
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||
code.setb(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, addend);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -111,36 +150,6 @@ void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
|||
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N <= 31);
|
||||
|
||||
u32 saturated_value = (1u << N) - 1;
|
||||
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||
code.xor_(overflow, overflow);
|
||||
code.cmp(reg_a, saturated_value);
|
||||
code.mov(result, saturated_value);
|
||||
code.cmovle(result, overflow);
|
||||
code.cmovbe(result, reg_a);
|
||||
|
||||
if (overflow_inst) {
|
||||
code.seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
|
@ -190,4 +199,66 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N <= 31);
|
||||
|
||||
u32 saturated_value = (1u << N) - 1;
|
||||
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||
code.xor_(overflow, overflow);
|
||||
code.cmp(reg_a, saturated_value);
|
||||
code.mov(result, saturated_value);
|
||||
code.cmovle(result, overflow);
|
||||
code.cmovbe(result, reg_a);
|
||||
|
||||
if (overflow_inst) {
|
||||
code.seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
|
|
|
@ -521,16 +521,56 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny&
|
|||
return {result, overflow};
|
||||
}
|
||||
|
||||
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
||||
ASSERT(bit_size_to_saturate_to <= 31);
|
||||
auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
||||
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
||||
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
|
||||
auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
||||
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||
return {result, overflow};
|
||||
}
|
||||
|
||||
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
||||
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
|
||||
auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
||||
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) {
|
||||
ASSERT(a.GetType() == b.GetType());
|
||||
const auto result = [&]() -> IR::UAny {
|
||||
switch (a.GetType()) {
|
||||
case IR::Type::U8:
|
||||
return Inst<U8>(Opcode::UnsignedSaturatedAdd8, a, b);
|
||||
case IR::Type::U16:
|
||||
return Inst<U16>(Opcode::UnsignedSaturatedAdd16, a, b);
|
||||
case IR::Type::U32:
|
||||
return Inst<U32>(Opcode::UnsignedSaturatedAdd32, a, b);
|
||||
case IR::Type::U64:
|
||||
return Inst<U64>(Opcode::UnsignedSaturatedAdd64, a, b);
|
||||
default:
|
||||
return IR::UAny{};
|
||||
}
|
||||
}();
|
||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||
return {result, overflow};
|
||||
}
|
||||
|
||||
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) {
|
||||
ASSERT(a.GetType() == b.GetType());
|
||||
const auto result = [&]() -> IR::UAny {
|
||||
switch (a.GetType()) {
|
||||
case IR::Type::U8:
|
||||
return Inst<U8>(Opcode::UnsignedSaturatedSub8, a, b);
|
||||
case IR::Type::U16:
|
||||
return Inst<U16>(Opcode::UnsignedSaturatedSub16, a, b);
|
||||
case IR::Type::U32:
|
||||
return Inst<U32>(Opcode::UnsignedSaturatedSub32, a, b);
|
||||
case IR::Type::U64:
|
||||
return Inst<U64>(Opcode::UnsignedSaturatedSub64, a, b);
|
||||
default:
|
||||
return IR::UAny{};
|
||||
}
|
||||
}();
|
||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||
return {result, overflow};
|
||||
}
|
||||
|
||||
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
||||
ASSERT(bit_size_to_saturate_to <= 31);
|
||||
auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
||||
auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||
return {result, overflow};
|
||||
}
|
||||
|
|
|
@ -144,8 +144,10 @@ public:
|
|||
|
||||
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
|
||||
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||
ResultAndOverflow<UAny> UnsignedSaturatedSub(const UAny& a, const UAny& b);
|
||||
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||
|
||||
ResultAndGE<U32> PackedAddU8(const U32& a, const U32& b);
|
||||
ResultAndGE<U32> PackedAddS8(const U32& a, const U32& b);
|
||||
|
|
|
@ -164,6 +164,14 @@ OPCODE(SignedSaturatedSub16, T::U16, T::U16,
|
|||
OPCODE(SignedSaturatedSub32, T::U32, T::U32, T::U32 )
|
||||
OPCODE(SignedSaturatedSub64, T::U64, T::U64, T::U64 )
|
||||
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
|
||||
OPCODE(UnsignedSaturatedAdd8, T::U8, T::U8, T::U8 )
|
||||
OPCODE(UnsignedSaturatedAdd16, T::U16, T::U16, T::U16 )
|
||||
OPCODE(UnsignedSaturatedAdd32, T::U32, T::U32, T::U32 )
|
||||
OPCODE(UnsignedSaturatedAdd64, T::U64, T::U64, T::U64 )
|
||||
OPCODE(UnsignedSaturatedSub8, T::U8, T::U8, T::U8 )
|
||||
OPCODE(UnsignedSaturatedSub16, T::U16, T::U16, T::U16 )
|
||||
OPCODE(UnsignedSaturatedSub32, T::U32, T::U32, T::U32 )
|
||||
OPCODE(UnsignedSaturatedSub64, T::U64, T::U64, T::U64 )
|
||||
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
|
||||
|
||||
// Packed instructions
|
||||
|
|
Loading…
Reference in a new issue