IR: Add masked shift IR instructions

Also use these in the A64 frontend to avoid the need to mask the shift amount.
This commit is contained in:
MerryMage 2020-04-05 17:40:24 +01:00
parent bd88286b21
commit 09d3c77d74
5 changed files with 161 additions and 8 deletions

View file

@ -4,6 +4,9 @@
* General Public License version 2 or any later version.
*/
#include <cstddef>
#include <type_traits>
#include "backend/x64/block_of_code.h"
#include "backend/x64/emit_x64.h"
#include "common/assert.h"
@ -725,6 +728,112 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
template <typename ShfitFT, typename BMI2FT>
static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
if (shift_arg.IsImmediate()) {
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
const u8 shift = shift_arg.GetImmediateU8();
shift_fn(result, shift & 0x1F);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if constexpr (!std::is_same_v<BMI2FT, std::nullptr_t>) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32();
const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32();
(code.*bmi2_shift)(result, operand, shift);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
}
ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
shift_fn(result, code.cl);
ctx.reg_alloc.DefineValue(inst, result);
}
template <typename ShfitFT, typename BMI2FT>
static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, ShfitFT shift_fn, [[maybe_unused]] BMI2FT bmi2_shift) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
if (shift_arg.IsImmediate()) {
const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
const u8 shift = shift_arg.GetImmediateU8();
shift_fn(result, shift & 0x3F);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if constexpr (!std::is_same_v<BMI2FT, std::nullptr_t>) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg);
const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg);
(code.*bmi2_shift)(result, operand, shift);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
}
ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
shift_fn(result, code.cl);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
}
void EmitX64::EmitLogicalShiftLeftMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shl(result, shift); }, &Xbyak::CodeGenerator::shlx);
}
void EmitX64::EmitLogicalShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
}
void EmitX64::EmitLogicalShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.shr(result, shift); }, &Xbyak::CodeGenerator::shrx);
}
void EmitX64::EmitArithmeticShiftRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
}
void EmitX64::EmitArithmeticShiftRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.sar(result, shift); }, &Xbyak::CodeGenerator::sarx);
}
void EmitX64::EmitRotateRightMasked32(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift32(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
}
void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) {
EmitMaskedShift64(code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr);
}
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) {
if (carry_in.IsImmediate()) {
return carry_out ? reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1};

View file

@ -8,17 +8,13 @@
namespace Dynarmic::A64 {
static IR::U8 SanitizeShiftAmount(TranslatorVisitor& v, size_t datasize, const IR::U32U64& amount) {
return v.ir.LeastSignificantByte(v.ir.And(amount, v.I(datasize, datasize - 1)));
}
bool TranslatorVisitor::LSLV(bool sf, Reg Rm, Reg Rn, Reg Rd) {
const size_t datasize = sf ? 64 : 32;
const IR::U32U64 operand = X(datasize, Rn);
const IR::U32U64 shift_amount = X(datasize, Rm);
const IR::U32U64 result = ir.LogicalShiftLeft(operand, SanitizeShiftAmount(*this, datasize, shift_amount));
const IR::U32U64 result = ir.LogicalShiftLeftMasked(operand, shift_amount);
X(datasize, Rd, result);
return true;
@ -30,7 +26,7 @@ bool TranslatorVisitor::LSRV(bool sf, Reg Rm, Reg Rn, Reg Rd) {
const IR::U32U64 operand = X(datasize, Rn);
const IR::U32U64 shift_amount = X(datasize, Rm);
const IR::U32U64 result = ir.LogicalShiftRight(operand, SanitizeShiftAmount(*this, datasize, shift_amount));
const IR::U32U64 result = ir.LogicalShiftRightMasked(operand, shift_amount);
X(datasize, Rd, result);
return true;
@ -42,7 +38,7 @@ bool TranslatorVisitor::ASRV(bool sf, Reg Rm, Reg Rn, Reg Rd) {
const IR::U32U64 operand = X(datasize, Rn);
const IR::U32U64 shift_amount = X(datasize, Rm);
const IR::U32U64 result = ir.ArithmeticShiftRight(operand, SanitizeShiftAmount(*this, datasize, shift_amount));
const IR::U32U64 result = ir.ArithmeticShiftRightMasked(operand, shift_amount);
X(datasize, Rd, result);
return true;
@ -54,7 +50,7 @@ bool TranslatorVisitor::RORV(bool sf, Reg Rm, Reg Rn, Reg Rd) {
const IR::U32U64 operand = X(datasize, Rn);
const IR::U32U64 shift_amount = X(datasize, Rm);
const IR::U32U64 result = ir.RotateRight(operand, SanitizeShiftAmount(*this, datasize, shift_amount));
const IR::U32U64 result = ir.RotateRightMasked(operand, shift_amount);
X(datasize, Rd, result);
return true;

View file

@ -185,6 +185,42 @@ U32U64 IREmitter::RotateRight(const U32U64& value_in, const U8& shift_amount) {
}
}
U32U64 IREmitter::LogicalShiftLeftMasked(const U32U64& value_in, const U32U64& shift_amount) {
ASSERT(value_in.GetType() == shift_amount.GetType());
if (value_in.GetType() == Type::U32) {
return Inst<U32>(Opcode::LogicalShiftLeftMasked32, value_in, shift_amount);
} else {
return Inst<U64>(Opcode::LogicalShiftLeftMasked64, value_in, shift_amount);
}
}
U32U64 IREmitter::LogicalShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount) {
ASSERT(value_in.GetType() == shift_amount.GetType());
if (value_in.GetType() == Type::U32) {
return Inst<U32>(Opcode::LogicalShiftRightMasked32, value_in, shift_amount);
} else {
return Inst<U64>(Opcode::LogicalShiftRightMasked64, value_in, shift_amount);
}
}
U32U64 IREmitter::ArithmeticShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount) {
ASSERT(value_in.GetType() == shift_amount.GetType());
if (value_in.GetType() == Type::U32) {
return Inst<U32>(Opcode::ArithmeticShiftRightMasked32, value_in, shift_amount);
} else {
return Inst<U64>(Opcode::ArithmeticShiftRightMasked64, value_in, shift_amount);
}
}
U32U64 IREmitter::RotateRightMasked(const U32U64& value_in, const U32U64& shift_amount) {
ASSERT(value_in.GetType() == shift_amount.GetType());
if (value_in.GetType() == Type::U32) {
return Inst<U32>(Opcode::RotateRightMasked32, value_in, shift_amount);
} else {
return Inst<U64>(Opcode::RotateRightMasked64, value_in, shift_amount);
}
}
ResultAndCarryAndOverflow<U32> IREmitter::AddWithCarry(const U32& a, const U32& b, const U1& carry_in) {
const auto result = Inst<U32>(Opcode::Add32, a, b, carry_in);
const auto carry_out = Inst<U1>(Opcode::GetCarryFromOp, result);

View file

@ -114,6 +114,10 @@ public:
U32U64 LogicalShiftRight(const U32U64& value_in, const U8& shift_amount);
U32U64 ArithmeticShiftRight(const U32U64& value_in, const U8& shift_amount);
U32U64 RotateRight(const U32U64& value_in, const U8& shift_amount);
U32U64 LogicalShiftLeftMasked(const U32U64& value_in, const U32U64& shift_amount);
U32U64 LogicalShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount);
U32U64 ArithmeticShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount);
U32U64 RotateRightMasked(const U32U64& value_in, const U32U64& shift_amount);
ResultAndCarry<U32> RotateRightExtended(const U32& value_in, const U1& carry_in);
ResultAndCarryAndOverflow<U32> AddWithCarry(const U32& a, const U32& b, const U1& carry_in);
ResultAndCarryAndOverflow<U32> SubWithCarry(const U32& a, const U32& b, const U1& carry_in);

View file

@ -113,6 +113,14 @@ OPCODE(ArithmeticShiftRight64, U64, U64,
OPCODE(RotateRight32, U32, U32, U8, U1 )
OPCODE(RotateRight64, U64, U64, U8 )
OPCODE(RotateRightExtended, U32, U32, U1 )
OPCODE(LogicalShiftLeftMasked32, U32, U32, U32 )
OPCODE(LogicalShiftLeftMasked64, U64, U64, U64 )
OPCODE(LogicalShiftRightMasked32, U32, U32, U32 )
OPCODE(LogicalShiftRightMasked64, U64, U64, U64 )
OPCODE(ArithmeticShiftRightMasked32, U32, U32, U32 )
OPCODE(ArithmeticShiftRightMasked64, U64, U64, U64 )
OPCODE(RotateRightMasked32, U32, U32, U32 )
OPCODE(RotateRightMasked64, U64, U64, U64 )
OPCODE(Add32, U32, U32, U32, U1 )
OPCODE(Add64, U64, U64, U64, U1 )
OPCODE(Sub32, U32, U32, U32, U1 )