IR/saturation: Revamp saturated add/sub IR instructions
This commit is contained in:
parent
2d0bf7ca9b
commit
babfb7d7b8
14 changed files with 292 additions and 297 deletions
|
@ -186,14 +186,6 @@ void EmitIR<IR::Opcode::A64SetFPSR>(oaknut::CodeGenerator& code, EmitContext& ct
|
||||||
ASSERT_FALSE("Unimplemented");
|
ASSERT_FALSE("Unimplemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
|
||||||
void EmitIR<IR::Opcode::A64OrQC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
(void)code;
|
|
||||||
(void)ctx;
|
|
||||||
(void)inst;
|
|
||||||
ASSERT_FALSE("Unimplemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
(void)code;
|
(void)code;
|
||||||
|
|
|
@ -18,6 +18,88 @@ namespace Dynarmic::Backend::Arm64 {
|
||||||
|
|
||||||
using namespace oaknut::util;
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void EmitIR<IR::Opcode::SignedSaturatedAddWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
(void)code;
|
||||||
|
(void)ctx;
|
||||||
|
(void)inst;
|
||||||
|
ASSERT_FALSE("Unimplemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void EmitIR<IR::Opcode::SignedSaturatedSubWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
(void)code;
|
||||||
|
(void)ctx;
|
||||||
|
(void)inst;
|
||||||
|
ASSERT_FALSE("Unimplemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void EmitIR<IR::Opcode::SignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const size_t N = args[1].GetImmediateU8();
|
||||||
|
ASSERT(N >= 1 && N <= 32);
|
||||||
|
|
||||||
|
if (N == 32) {
|
||||||
|
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||||
|
if (overflow_inst) {
|
||||||
|
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||||
|
RegAlloc::Realize(Woverflow);
|
||||||
|
code.MOV(*Woverflow, WZR);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||||
|
const u32 negative_saturated_value = ~u32{0} << (N - 1);
|
||||||
|
|
||||||
|
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
|
||||||
|
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||||
|
RegAlloc::Realize(Woperand, Wresult);
|
||||||
|
ctx.reg_alloc.SpillFlags();
|
||||||
|
|
||||||
|
code.MOV(Wscratch0, negative_saturated_value);
|
||||||
|
code.MOV(Wscratch1, positive_saturated_value);
|
||||||
|
code.CMP(*Woperand, Wscratch0);
|
||||||
|
code.CSEL(Wresult, Woperand, Wscratch0, GT);
|
||||||
|
code.CMP(*Woperand, Wscratch1);
|
||||||
|
code.CSEL(Wresult, Wresult, Wscratch1, LT);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||||
|
RegAlloc::Realize(Woverflow);
|
||||||
|
code.CMP(*Wresult, Woperand);
|
||||||
|
code.CSET(Woverflow, NE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void EmitIR<IR::Opcode::UnsignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||||
|
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
|
||||||
|
RegAlloc::Realize(Wresult, Woperand);
|
||||||
|
ctx.reg_alloc.SpillFlags();
|
||||||
|
|
||||||
|
const size_t N = args[1].GetImmediateU8();
|
||||||
|
ASSERT(N <= 31);
|
||||||
|
const u32 saturated_value = (1u << N) - 1;
|
||||||
|
|
||||||
|
code.MOV(Wscratch0, saturated_value);
|
||||||
|
code.CMP(*Woperand, Wscratch0);
|
||||||
|
code.CSEL(Wresult, Woperand, Wscratch0, LS);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||||
|
RegAlloc::Realize(Woverflow);
|
||||||
|
code.CSET(Woverflow, HI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
(void)code;
|
(void)code;
|
||||||
|
@ -98,47 +180,6 @@ void EmitIR<IR::Opcode::SignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitC
|
||||||
ASSERT_FALSE("Unimplemented");
|
ASSERT_FALSE("Unimplemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
|
||||||
void EmitIR<IR::Opcode::SignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const size_t N = args[1].GetImmediateU8();
|
|
||||||
ASSERT(N >= 1 && N <= 32);
|
|
||||||
|
|
||||||
if (N == 32) {
|
|
||||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
|
||||||
if (overflow_inst) {
|
|
||||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
|
||||||
RegAlloc::Realize(Woverflow);
|
|
||||||
code.MOV(*Woverflow, WZR);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
|
||||||
const u32 negative_saturated_value = ~u32{0} << (N - 1);
|
|
||||||
|
|
||||||
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
|
|
||||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
|
||||||
RegAlloc::Realize(Woperand, Wresult);
|
|
||||||
ctx.reg_alloc.SpillFlags();
|
|
||||||
|
|
||||||
code.MOV(Wscratch0, negative_saturated_value);
|
|
||||||
code.MOV(Wscratch1, positive_saturated_value);
|
|
||||||
code.CMP(*Woperand, Wscratch0);
|
|
||||||
code.CSEL(Wresult, Woperand, Wscratch0, GT);
|
|
||||||
code.CMP(*Woperand, Wscratch1);
|
|
||||||
code.CSEL(Wresult, Wresult, Wscratch1, LT);
|
|
||||||
|
|
||||||
if (overflow_inst) {
|
|
||||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
|
||||||
RegAlloc::Realize(Woverflow);
|
|
||||||
code.CMP(*Wresult, Woperand);
|
|
||||||
code.CSET(Woverflow, NE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
(void)code;
|
(void)code;
|
||||||
|
@ -203,29 +244,4 @@ void EmitIR<IR::Opcode::UnsignedSaturatedSub64>(oaknut::CodeGenerator& code, Emi
|
||||||
ASSERT_FALSE("Unimplemented");
|
ASSERT_FALSE("Unimplemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
|
||||||
void EmitIR<IR::Opcode::UnsignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
|
||||||
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
|
|
||||||
RegAlloc::Realize(Wresult, Woperand);
|
|
||||||
ctx.reg_alloc.SpillFlags();
|
|
||||||
|
|
||||||
const size_t N = args[1].GetImmediateU8();
|
|
||||||
ASSERT(N <= 31);
|
|
||||||
const u32 saturated_value = (1u << N) - 1;
|
|
||||||
|
|
||||||
code.MOV(Wscratch0, saturated_value);
|
|
||||||
code.CMP(*Woperand, Wscratch0);
|
|
||||||
code.CSEL(Wresult, Woperand, Wscratch0, LS);
|
|
||||||
|
|
||||||
if (overflow_inst) {
|
|
||||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
|
||||||
RegAlloc::Realize(Woverflow);
|
|
||||||
code.CSET(Woverflow, HI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::Arm64
|
} // namespace Dynarmic::Backend::Arm64
|
||||||
|
|
|
@ -452,22 +452,6 @@ void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64OrQC(A64EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
if (args[0].IsImmediate()) {
|
|
||||||
if (!args[0].GetImmediateU1()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
code.mov(code.byte[code.r15 + offsetof(A64JitState, fpsr_qc)], u8(1));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
|
||||||
code.or_(code.byte[code.r15 + offsetof(A64JitState, fpsr_qc)], to_store);
|
|
||||||
}
|
|
||||||
|
|
||||||
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const auto addr = qword[r15 + offsetof(A64JitState, pc)];
|
const auto addr = qword[r15 + offsetof(A64JitState, pc)];
|
||||||
|
|
|
@ -27,10 +27,8 @@ enum class Op {
|
||||||
Sub,
|
Sub,
|
||||||
};
|
};
|
||||||
|
|
||||||
template<Op op, size_t size>
|
template<Op op, size_t size, bool has_overflow_inst = false>
|
||||||
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
|
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
|
||||||
|
@ -62,11 +60,14 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
||||||
code.cmovo(result, overflow);
|
code.cmovo(result, overflow);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (overflow_inst) {
|
code.seto(overflow.cvt8());
|
||||||
code.seto(overflow.cvt8());
|
if constexpr (has_overflow_inst) {
|
||||||
|
if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) {
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
ctx.EraseInstruction(overflow_inst);
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -74,8 +75,6 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
||||||
|
|
||||||
template<Op op, size_t size>
|
template<Op op, size_t size>
|
||||||
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
|
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
|
||||||
|
@ -95,109 +94,21 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
code.cmovae(addend, op_result);
|
code.cmovae(addend, op_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (overflow_inst) {
|
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||||
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
code.setb(overflow.cvt8());
|
||||||
code.setb(overflow.cvt8());
|
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
|
||||||
ctx.EraseInstruction(overflow_inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, addend);
|
ctx.reg_alloc.DefineValue(inst, addend);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSignedSaturatedAddWithFlag32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
EmitSignedSaturatedOp<Op::Add, 32, true>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSignedSaturatedSubWithFlag32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
EmitSignedSaturatedOp<Op::Sub, 32, true>(code, ctx, inst);
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
|
||||||
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
|
||||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
|
|
||||||
code.movsx(x, x.cvt16());
|
|
||||||
code.movsx(y, y.cvt16());
|
|
||||||
|
|
||||||
code.imul(x, y);
|
|
||||||
code.lea(y, ptr[x.cvt64() + x.cvt64()]);
|
|
||||||
code.mov(tmp, x);
|
|
||||||
code.shr(tmp, 15);
|
|
||||||
code.xor_(y, x);
|
|
||||||
code.mov(y, 0x7FFF);
|
|
||||||
code.cmovns(y, tmp);
|
|
||||||
|
|
||||||
if (overflow_inst) {
|
|
||||||
code.sets(tmp.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
|
|
||||||
ctx.EraseInstruction(overflow_inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, y);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
|
|
||||||
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
|
|
||||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
|
||||||
|
|
||||||
code.movsxd(x, x.cvt32());
|
|
||||||
code.movsxd(y, y.cvt32());
|
|
||||||
|
|
||||||
code.imul(x, y);
|
|
||||||
code.lea(y, ptr[x + x]);
|
|
||||||
code.mov(tmp, x);
|
|
||||||
code.shr(tmp, 31);
|
|
||||||
code.xor_(y, x);
|
|
||||||
code.mov(y.cvt32(), 0x7FFFFFFF);
|
|
||||||
code.cmovns(y.cvt32(), tmp.cvt32());
|
|
||||||
|
|
||||||
if (overflow_inst) {
|
|
||||||
code.sets(tmp.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
|
|
||||||
ctx.EraseInstruction(overflow_inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, y);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -250,6 +161,116 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const size_t N = args[1].GetImmediateU8();
|
||||||
|
ASSERT(N <= 31);
|
||||||
|
|
||||||
|
const u32 saturated_value = (1u << N) - 1;
|
||||||
|
|
||||||
|
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||||
|
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||||
|
code.xor_(overflow, overflow);
|
||||||
|
code.cmp(reg_a, saturated_value);
|
||||||
|
code.mov(result, saturated_value);
|
||||||
|
code.cmovle(result, overflow);
|
||||||
|
code.cmovbe(result, reg_a);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
code.seta(overflow.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
|
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||||
|
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code.movsx(x, x.cvt16());
|
||||||
|
code.movsx(y, y.cvt16());
|
||||||
|
|
||||||
|
code.imul(x, y);
|
||||||
|
code.lea(y, ptr[x.cvt64() + x.cvt64()]);
|
||||||
|
code.mov(tmp, x);
|
||||||
|
code.shr(tmp, 15);
|
||||||
|
code.xor_(y, x);
|
||||||
|
code.mov(y, 0x7FFF);
|
||||||
|
code.cmovns(y, tmp);
|
||||||
|
|
||||||
|
code.sets(tmp.cvt8());
|
||||||
|
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||||
|
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||||
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
code.movsxd(x, x.cvt32());
|
||||||
|
code.movsxd(y, y.cvt32());
|
||||||
|
|
||||||
|
code.imul(x, y);
|
||||||
|
code.lea(y, ptr[x + x]);
|
||||||
|
code.mov(tmp, x);
|
||||||
|
code.shr(tmp, 31);
|
||||||
|
code.xor_(y, x);
|
||||||
|
code.mov(y.cvt32(), 0x7FFFFFFF);
|
||||||
|
code.cmovns(y.cvt32(), tmp.cvt32());
|
||||||
|
|
||||||
|
code.sets(tmp.cvt8());
|
||||||
|
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
@ -282,34 +303,4 @@ void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
|
||||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const size_t N = args[1].GetImmediateU8();
|
|
||||||
ASSERT(N <= 31);
|
|
||||||
|
|
||||||
const u32 saturated_value = (1u << N) - 1;
|
|
||||||
|
|
||||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
|
||||||
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
|
|
||||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
|
||||||
code.xor_(overflow, overflow);
|
|
||||||
code.cmp(reg_a, saturated_value);
|
|
||||||
code.mov(result, saturated_value);
|
|
||||||
code.cmovle(result, overflow);
|
|
||||||
code.cmovbe(result, reg_a);
|
|
||||||
|
|
||||||
if (overflow_inst) {
|
|
||||||
code.seta(overflow.cvt8());
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
|
||||||
ctx.EraseInstruction(overflow_inst);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -116,7 +116,7 @@ bool TranslatorVisitor::arm_QADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto a = ir.GetRegister(m);
|
const auto a = ir.GetRegister(m);
|
||||||
const auto b = ir.GetRegister(n);
|
const auto b = ir.GetRegister(n);
|
||||||
const auto result = ir.SignedSaturatedAdd(a, b);
|
const auto result = ir.SignedSaturatedAddWithFlag(a, b);
|
||||||
|
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
|
@ -135,7 +135,7 @@ bool TranslatorVisitor::arm_QSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto a = ir.GetRegister(m);
|
const auto a = ir.GetRegister(m);
|
||||||
const auto b = ir.GetRegister(n);
|
const auto b = ir.GetRegister(n);
|
||||||
const auto result = ir.SignedSaturatedSub(a, b);
|
const auto result = ir.SignedSaturatedSubWithFlag(a, b);
|
||||||
|
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
|
@ -154,10 +154,10 @@ bool TranslatorVisitor::arm_QDADD(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto a = ir.GetRegister(m);
|
const auto a = ir.GetRegister(m);
|
||||||
const auto b = ir.GetRegister(n);
|
const auto b = ir.GetRegister(n);
|
||||||
const auto doubled = ir.SignedSaturatedAdd(b, b);
|
const auto doubled = ir.SignedSaturatedAddWithFlag(b, b);
|
||||||
ir.OrQFlag(doubled.overflow);
|
ir.OrQFlag(doubled.overflow);
|
||||||
|
|
||||||
const auto result = ir.SignedSaturatedAdd(a, doubled.result);
|
const auto result = ir.SignedSaturatedAddWithFlag(a, doubled.result);
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
return true;
|
return true;
|
||||||
|
@ -175,10 +175,10 @@ bool TranslatorVisitor::arm_QDSUB(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto a = ir.GetRegister(m);
|
const auto a = ir.GetRegister(m);
|
||||||
const auto b = ir.GetRegister(n);
|
const auto b = ir.GetRegister(n);
|
||||||
const auto doubled = ir.SignedSaturatedAdd(b, b);
|
const auto doubled = ir.SignedSaturatedAddWithFlag(b, b);
|
||||||
ir.OrQFlag(doubled.overflow);
|
ir.OrQFlag(doubled.overflow);
|
||||||
|
|
||||||
const auto result = ir.SignedSaturatedSub(a, doubled.result);
|
const auto result = ir.SignedSaturatedSubWithFlag(a, doubled.result);
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -26,7 +26,7 @@ bool TranslatorVisitor::thumb32_QADD(Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto reg_m = ir.GetRegister(m);
|
const auto reg_m = ir.GetRegister(m);
|
||||||
const auto reg_n = ir.GetRegister(n);
|
const auto reg_n = ir.GetRegister(n);
|
||||||
const auto result = ir.SignedSaturatedAdd(reg_m, reg_n);
|
const auto result = ir.SignedSaturatedAddWithFlag(reg_m, reg_n);
|
||||||
|
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
|
@ -40,10 +40,10 @@ bool TranslatorVisitor::thumb32_QDADD(Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto reg_m = ir.GetRegister(m);
|
const auto reg_m = ir.GetRegister(m);
|
||||||
const auto reg_n = ir.GetRegister(n);
|
const auto reg_n = ir.GetRegister(n);
|
||||||
const auto doubled_n = ir.SignedSaturatedAdd(reg_n, reg_n);
|
const auto doubled_n = ir.SignedSaturatedAddWithFlag(reg_n, reg_n);
|
||||||
ir.OrQFlag(doubled_n.overflow);
|
ir.OrQFlag(doubled_n.overflow);
|
||||||
|
|
||||||
const auto result = ir.SignedSaturatedAdd(reg_m, doubled_n.result);
|
const auto result = ir.SignedSaturatedAddWithFlag(reg_m, doubled_n.result);
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
return true;
|
return true;
|
||||||
|
@ -56,10 +56,10 @@ bool TranslatorVisitor::thumb32_QDSUB(Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto reg_m = ir.GetRegister(m);
|
const auto reg_m = ir.GetRegister(m);
|
||||||
const auto reg_n = ir.GetRegister(n);
|
const auto reg_n = ir.GetRegister(n);
|
||||||
const auto doubled_n = ir.SignedSaturatedAdd(reg_n, reg_n);
|
const auto doubled_n = ir.SignedSaturatedAddWithFlag(reg_n, reg_n);
|
||||||
ir.OrQFlag(doubled_n.overflow);
|
ir.OrQFlag(doubled_n.overflow);
|
||||||
|
|
||||||
const auto result = ir.SignedSaturatedSub(reg_m, doubled_n.result);
|
const auto result = ir.SignedSaturatedSubWithFlag(reg_m, doubled_n.result);
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
return true;
|
return true;
|
||||||
|
@ -72,7 +72,7 @@ bool TranslatorVisitor::thumb32_QSUB(Reg n, Reg d, Reg m) {
|
||||||
|
|
||||||
const auto reg_m = ir.GetRegister(m);
|
const auto reg_m = ir.GetRegister(m);
|
||||||
const auto reg_n = ir.GetRegister(n);
|
const auto reg_n = ir.GetRegister(n);
|
||||||
const auto result = ir.SignedSaturatedSub(reg_m, reg_n);
|
const auto result = ir.SignedSaturatedSubWithFlag(reg_m, reg_n);
|
||||||
|
|
||||||
ir.SetRegister(d, result.result);
|
ir.SetRegister(d, result.result);
|
||||||
ir.OrQFlag(result.overflow);
|
ir.OrQFlag(result.overflow);
|
||||||
|
|
|
@ -42,10 +42,6 @@ void IREmitter::SetNZCV(const IR::NZCV& nzcv) {
|
||||||
Inst(Opcode::A64SetNZCV, nzcv);
|
Inst(Opcode::A64SetNZCV, nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::OrQC(const IR::U1& value) {
|
|
||||||
Inst(Opcode::A64OrQC, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
void IREmitter::CallSupervisor(u32 imm) {
|
void IREmitter::CallSupervisor(u32 imm) {
|
||||||
Inst(Opcode::A64CallSupervisor, Imm32(imm));
|
Inst(Opcode::A64CallSupervisor, Imm32(imm));
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,6 @@ public:
|
||||||
IR::U32 GetNZCVRaw();
|
IR::U32 GetNZCVRaw();
|
||||||
void SetNZCVRaw(IR::U32 value);
|
void SetNZCVRaw(IR::U32 value);
|
||||||
void SetNZCV(const IR::NZCV& nzcv);
|
void SetNZCV(const IR::NZCV& nzcv);
|
||||||
void OrQC(const IR::U1& value);
|
|
||||||
|
|
||||||
void CallSupervisor(u32 imm);
|
void CallSupervisor(u32 imm);
|
||||||
void ExceptionRaised(Exception exception);
|
void ExceptionRaised(Exception exception);
|
||||||
|
|
|
@ -131,8 +131,7 @@ bool TranslatorVisitor::SQADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||||
const IR::UAny operand2 = V_scalar(esize, Vm);
|
const IR::UAny operand2 = V_scalar(esize, Vm);
|
||||||
const auto result = ir.SignedSaturatedAdd(operand1, operand2);
|
const auto result = ir.SignedSaturatedAdd(operand1, operand2);
|
||||||
ir.OrQC(result.overflow);
|
V_scalar(esize, Vd, result);
|
||||||
V_scalar(esize, Vd, result.result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,10 +145,7 @@ bool TranslatorVisitor::SQDMULH_vec_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||||
const IR::UAny operand2 = V_scalar(esize, Vm);
|
const IR::UAny operand2 = V_scalar(esize, Vm);
|
||||||
const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2);
|
const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2);
|
||||||
|
V_scalar(esize, Vd, result);
|
||||||
ir.OrQC(result.overflow);
|
|
||||||
|
|
||||||
V_scalar(esize, Vd, result.result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,8 +171,7 @@ bool TranslatorVisitor::SQSUB_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||||
const IR::UAny operand2 = V_scalar(esize, Vm);
|
const IR::UAny operand2 = V_scalar(esize, Vm);
|
||||||
const auto result = ir.SignedSaturatedSub(operand1, operand2);
|
const auto result = ir.SignedSaturatedSub(operand1, operand2);
|
||||||
ir.OrQC(result.overflow);
|
V_scalar(esize, Vd, result);
|
||||||
V_scalar(esize, Vd, result.result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,8 +181,7 @@ bool TranslatorVisitor::UQADD_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||||
const IR::UAny operand2 = V_scalar(esize, Vm);
|
const IR::UAny operand2 = V_scalar(esize, Vm);
|
||||||
const auto result = ir.UnsignedSaturatedAdd(operand1, operand2);
|
const auto result = ir.UnsignedSaturatedAdd(operand1, operand2);
|
||||||
ir.OrQC(result.overflow);
|
V_scalar(esize, Vd, result);
|
||||||
V_scalar(esize, Vd, result.result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,8 +191,7 @@ bool TranslatorVisitor::UQSUB_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||||
const IR::UAny operand2 = V_scalar(esize, Vm);
|
const IR::UAny operand2 = V_scalar(esize, Vm);
|
||||||
const auto result = ir.UnsignedSaturatedSub(operand1, operand2);
|
const auto result = ir.UnsignedSaturatedSub(operand1, operand2);
|
||||||
ir.OrQC(result.overflow);
|
V_scalar(esize, Vd, result);
|
||||||
V_scalar(esize, Vd, result.result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -127,10 +127,7 @@ bool TranslatorVisitor::SQDMULH_elt_1(Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vm
|
||||||
const IR::UAny operand1 = V_scalar(esize, Vn);
|
const IR::UAny operand1 = V_scalar(esize, Vn);
|
||||||
const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index);
|
const IR::UAny operand2 = ir.VectorGetElement(esize, V(128, Vm), index);
|
||||||
const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2);
|
const auto result = ir.SignedSaturatedDoublingMultiplyReturnHigh(operand1, operand2);
|
||||||
|
V_scalar(esize, Vd, result);
|
||||||
ir.OrQC(result.overflow);
|
|
||||||
|
|
||||||
V_scalar(esize, Vd, result.result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -525,7 +525,33 @@ U32U64 IREmitter::MinUnsigned(const U32U64& a, const U32U64& b) {
|
||||||
return Inst<U64>(Opcode::MinUnsigned64, a, b);
|
return Inst<U64>(Opcode::MinUnsigned64, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
|
ResultAndOverflow<U32> IREmitter::SignedSaturatedAddWithFlag(const U32& a, const U32& b) {
|
||||||
|
const auto result = Inst<U32>(Opcode::SignedSaturatedAddWithFlag32, a, b);
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultAndOverflow<U32> IREmitter::SignedSaturatedSubWithFlag(const U32& a, const U32& b) {
|
||||||
|
const auto result = Inst<U32>(Opcode::SignedSaturatedSubWithFlag32, a, b);
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
||||||
|
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
|
||||||
|
const auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
||||||
|
ASSERT(bit_size_to_saturate_to <= 31);
|
||||||
|
const auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
|
UAny IREmitter::SignedSaturatedAdd(const UAny& a, const UAny& b) {
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
const auto result = [&]() -> IR::UAny {
|
const auto result = [&]() -> IR::UAny {
|
||||||
switch (a.GetType()) {
|
switch (a.GetType()) {
|
||||||
|
@ -541,11 +567,10 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny&
|
||||||
return IR::UAny{};
|
return IR::UAny{};
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
return result;
|
||||||
return {result, overflow};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
|
UAny IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
const auto result = [&]() -> IR::UAny {
|
const auto result = [&]() -> IR::UAny {
|
||||||
switch (a.GetType()) {
|
switch (a.GetType()) {
|
||||||
|
@ -557,12 +582,10 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(con
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
return result;
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
|
||||||
return {result, overflow};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
|
UAny IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
const auto result = [&]() -> IR::UAny {
|
const auto result = [&]() -> IR::UAny {
|
||||||
switch (a.GetType()) {
|
switch (a.GetType()) {
|
||||||
|
@ -578,18 +601,10 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny&
|
||||||
return IR::UAny{};
|
return IR::UAny{};
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
return result;
|
||||||
return {result, overflow};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<U32> IREmitter::SignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
UAny IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) {
|
||||||
ASSERT(bit_size_to_saturate_to >= 1 && bit_size_to_saturate_to <= 32);
|
|
||||||
const auto result = Inst<U32>(Opcode::SignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
|
||||||
return {result, overflow};
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAny& b) {
|
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
const auto result = [&]() -> IR::UAny {
|
const auto result = [&]() -> IR::UAny {
|
||||||
switch (a.GetType()) {
|
switch (a.GetType()) {
|
||||||
|
@ -605,11 +620,10 @@ ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedAdd(const UAny& a, const UAn
|
||||||
return IR::UAny{};
|
return IR::UAny{};
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
return result;
|
||||||
return {result, overflow};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) {
|
UAny IREmitter::UnsignedSaturatedSub(const UAny& a, const UAny& b) {
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
const auto result = [&]() -> IR::UAny {
|
const auto result = [&]() -> IR::UAny {
|
||||||
switch (a.GetType()) {
|
switch (a.GetType()) {
|
||||||
|
@ -625,15 +639,7 @@ ResultAndOverflow<UAny> IREmitter::UnsignedSaturatedSub(const UAny& a, const UAn
|
||||||
return IR::UAny{};
|
return IR::UAny{};
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
return result;
|
||||||
return {result, overflow};
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultAndOverflow<U32> IREmitter::UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to) {
|
|
||||||
ASSERT(bit_size_to_saturate_to <= 31);
|
|
||||||
const auto result = Inst<U32>(Opcode::UnsignedSaturation, a, Imm8(static_cast<u8>(bit_size_to_saturate_to)));
|
|
||||||
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
|
||||||
return {result, overflow};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b) {
|
||||||
|
|
|
@ -150,14 +150,17 @@ public:
|
||||||
U32U64 MinSigned(const U32U64& a, const U32U64& b);
|
U32U64 MinSigned(const U32U64& a, const U32U64& b);
|
||||||
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
|
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
|
||||||
|
|
||||||
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
|
ResultAndOverflow<U32> SignedSaturatedAddWithFlag(const U32& a, const U32& b);
|
||||||
ResultAndOverflow<UAny> SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
|
ResultAndOverflow<U32> SignedSaturatedSubWithFlag(const U32& a, const U32& b);
|
||||||
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
|
|
||||||
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||||
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
|
|
||||||
ResultAndOverflow<UAny> UnsignedSaturatedSub(const UAny& a, const UAny& b);
|
|
||||||
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
ResultAndOverflow<U32> UnsignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||||
|
|
||||||
|
UAny SignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||||
|
UAny SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
|
||||||
|
UAny SignedSaturatedSub(const UAny& a, const UAny& b);
|
||||||
|
UAny UnsignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||||
|
UAny UnsignedSaturatedSub(const UAny& a, const UAny& b);
|
||||||
|
|
||||||
U128 VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b);
|
U128 VectorSignedSaturatedAdd(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorSignedSaturatedSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSignedSaturatedSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorUnsignedSaturatedAdd(size_t esize, const U128& a, const U128& b);
|
U128 VectorUnsignedSaturatedAdd(size_t esize, const U128& a, const U128& b);
|
||||||
|
|
|
@ -418,7 +418,24 @@ bool Inst::ReadsFromFPSRCumulativeSaturationBit() const {
|
||||||
|
|
||||||
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case Opcode::A64OrQC:
|
case Opcode::SignedSaturatedAdd8:
|
||||||
|
case Opcode::SignedSaturatedAdd16:
|
||||||
|
case Opcode::SignedSaturatedAdd32:
|
||||||
|
case Opcode::SignedSaturatedAdd64:
|
||||||
|
case Opcode::SignedSaturatedDoublingMultiplyReturnHigh16:
|
||||||
|
case Opcode::SignedSaturatedDoublingMultiplyReturnHigh32:
|
||||||
|
case Opcode::SignedSaturatedSub8:
|
||||||
|
case Opcode::SignedSaturatedSub16:
|
||||||
|
case Opcode::SignedSaturatedSub32:
|
||||||
|
case Opcode::SignedSaturatedSub64:
|
||||||
|
case Opcode::UnsignedSaturatedAdd8:
|
||||||
|
case Opcode::UnsignedSaturatedAdd16:
|
||||||
|
case Opcode::UnsignedSaturatedAdd32:
|
||||||
|
case Opcode::UnsignedSaturatedAdd64:
|
||||||
|
case Opcode::UnsignedSaturatedSub8:
|
||||||
|
case Opcode::UnsignedSaturatedSub16:
|
||||||
|
case Opcode::UnsignedSaturatedSub32:
|
||||||
|
case Opcode::UnsignedSaturatedSub64:
|
||||||
case Opcode::VectorSignedSaturatedAbs8:
|
case Opcode::VectorSignedSaturatedAbs8:
|
||||||
case Opcode::VectorSignedSaturatedAbs16:
|
case Opcode::VectorSignedSaturatedAbs16:
|
||||||
case Opcode::VectorSignedSaturatedAbs32:
|
case Opcode::VectorSignedSaturatedAbs32:
|
||||||
|
|
|
@ -63,7 +63,6 @@ A64OPC(SetQ, Void, A64V
|
||||||
A64OPC(SetSP, Void, U64 )
|
A64OPC(SetSP, Void, U64 )
|
||||||
A64OPC(SetFPCR, Void, U32 )
|
A64OPC(SetFPCR, Void, U32 )
|
||||||
A64OPC(SetFPSR, Void, U32 )
|
A64OPC(SetFPSR, Void, U32 )
|
||||||
A64OPC(OrQC, Void, U1 )
|
|
||||||
A64OPC(SetPC, Void, U64 )
|
A64OPC(SetPC, Void, U64 )
|
||||||
A64OPC(CallSupervisor, Void, U32 )
|
A64OPC(CallSupervisor, Void, U32 )
|
||||||
A64OPC(ExceptionRaised, Void, U64, U64 )
|
A64OPC(ExceptionRaised, Void, U64, U64 )
|
||||||
|
@ -178,6 +177,10 @@ OPCODE(MinUnsigned32, U32, U32,
|
||||||
OPCODE(MinUnsigned64, U64, U64, U64 )
|
OPCODE(MinUnsigned64, U64, U64, U64 )
|
||||||
|
|
||||||
// Saturated instructions
|
// Saturated instructions
|
||||||
|
OPCODE(SignedSaturatedAddWithFlag32, U32, U32, U32 )
|
||||||
|
OPCODE(SignedSaturatedSubWithFlag32, U32, U32, U32 )
|
||||||
|
OPCODE(SignedSaturation, U32, U32, U8 )
|
||||||
|
OPCODE(UnsignedSaturation, U32, U32, U8 )
|
||||||
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
|
OPCODE(SignedSaturatedAdd8, U8, U8, U8 )
|
||||||
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
|
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
|
||||||
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
|
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
|
||||||
|
@ -188,7 +191,6 @@ OPCODE(SignedSaturatedSub8, U8, U8,
|
||||||
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
|
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
|
||||||
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
|
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
|
||||||
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
|
OPCODE(SignedSaturatedSub64, U64, U64, U64 )
|
||||||
OPCODE(SignedSaturation, U32, U32, U8 )
|
|
||||||
OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
|
OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 )
|
||||||
OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
|
OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 )
|
||||||
OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
|
OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 )
|
||||||
|
@ -197,7 +199,6 @@ OPCODE(UnsignedSaturatedSub8, U8, U8,
|
||||||
OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
|
OPCODE(UnsignedSaturatedSub16, U16, U16, U16 )
|
||||||
OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
|
OPCODE(UnsignedSaturatedSub32, U32, U32, U32 )
|
||||||
OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
|
OPCODE(UnsignedSaturatedSub64, U64, U64, U64 )
|
||||||
OPCODE(UnsignedSaturation, U32, U32, U8 )
|
|
||||||
|
|
||||||
// Vector saturated instructions
|
// Vector saturated instructions
|
||||||
OPCODE(VectorSignedSaturatedAdd8, U128, U128, U128 )
|
OPCODE(VectorSignedSaturatedAdd8, U128, U128, U128 )
|
||||||
|
|
Loading…
Reference in a new issue