IR: Add AndNot{32,64} IR instruction

Also includes BMI1-acceleration for x64, when available
This commit is contained in:
Wunkolo 2021-06-25 00:03:06 -07:00 committed by merry
parent 49d00634f9
commit 5971361160
14 changed files with 96 additions and 16 deletions

View file

@ -1283,6 +1283,72 @@ void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitAndNot32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate() && !args[1].IsImmediate() && code.HasHostFeature(HostFeature::BMI1)) {
Xbyak::Reg32 op_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
Xbyak::Reg32 op_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code.andn(result, op_b, op_a);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
Xbyak::Reg32 result;
if (args[1].IsImmediate()) {
result = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(result, u32(~args[1].GetImmediateU32()));
} else {
result = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
code.not_(result);
}
if (args[0].IsImmediate()) {
const u32 op_arg = args[0].GetImmediateU32();
code.and_(result, op_arg);
} else {
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[0]);
op_arg.setBit(32);
code.and_(result, *op_arg);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitAndNot64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate() && !args[1].IsImmediate() && code.HasHostFeature(HostFeature::BMI1)) {
Xbyak::Reg64 op_a = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Reg64 op_b = ctx.reg_alloc.UseGpr(args[1]);
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
code.andn(result, op_b, op_a);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
Xbyak::Reg64 result;
if (args[1].IsImmediate()) {
result = ctx.reg_alloc.ScratchGpr();
code.mov(result, ~args[1].GetImmediateU64());
} else {
result = ctx.reg_alloc.UseScratchGpr(args[1]);
code.not_(result);
}
if (args[0].FitsInImmediateS32()) {
const u32 op_arg = u32(args[0].GetImmediateS32());
code.and_(result, op_arg);
} else {
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[0]);
op_arg.setBit(64);
code.and_(result, *op_arg);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -250,7 +250,7 @@ bool TranslatorVisitor::arm_BIC_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
}
const auto imm_carry = ArmExpandImm_C(rotate, imm8, ir.GetCFlag());
const auto result = ir.And(ir.GetRegister(n), ir.Not(ir.Imm32(imm_carry.imm32)));
const auto result = ir.AndNot(ir.GetRegister(n), ir.Imm32(imm_carry.imm32));
if (d == Reg::PC) {
if (S) {
// This is UNPREDICTABLE when in user-mode.
@ -280,7 +280,7 @@ bool TranslatorVisitor::arm_BIC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
const auto carry_in = ir.GetCFlag();
const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, carry_in);
const auto result = ir.And(ir.GetRegister(n), ir.Not(shifted.result));
const auto result = ir.AndNot(ir.GetRegister(n), shifted.result);
if (d == Reg::PC) {
if (S) {
// This is UNPREDICTABLE when in user-mode.
@ -315,7 +315,7 @@ bool TranslatorVisitor::arm_BIC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shift_n = ir.LeastSignificantByte(ir.GetRegister(s));
const auto carry_in = ir.GetCFlag();
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.And(ir.GetRegister(n), ir.Not(shifted.result));
const auto result = ir.AndNot(ir.GetRegister(n), shifted.result);
ir.SetRegister(d, result);
if (S) {

View file

@ -356,7 +356,7 @@ bool TranslatorVisitor::thumb16_MUL_reg(Reg n, Reg d_m) {
bool TranslatorVisitor::thumb16_BIC_reg(Reg m, Reg d_n) {
const Reg d = d_n;
const Reg n = d_n;
const auto result = ir.And(ir.GetRegister(n), ir.Not(ir.GetRegister(m)));
const auto result = ir.AndNot(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {

View file

@ -45,7 +45,7 @@ bool TranslatorVisitor::thumb32_BIC_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re
}
const auto imm_carry = ThumbExpandImm_C(i, imm3, imm8, ir.GetCFlag());
const auto result = ir.And(ir.GetRegister(n), ir.Not(ir.Imm32(imm_carry.imm32)));
const auto result = ir.AndNot(ir.GetRegister(n), ir.Imm32(imm_carry.imm32));
ir.SetRegister(d, result);
if (S) {

View file

@ -45,7 +45,7 @@ bool TranslatorVisitor::thumb32_BIC_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
}
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.And(ir.GetRegister(n), ir.Not(shifted.result));
const auto result = ir.AndNot(ir.GetRegister(n), shifted.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result));

View file

@ -128,8 +128,8 @@ bool TranslatorVisitor::BIC_shift(bool sf, Imm<2> shift, Reg Rm, Imm<6> imm6, Re
const u8 shift_amount = imm6.ZeroExtend<u8>();
const auto operand1 = X(datasize, Rn);
const auto operand2 = ir.Not(ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount)));
const auto result = ir.And(operand1, operand2);
const auto operand2 = ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount));
const auto result = ir.AndNot(operand1, operand2);
X(datasize, Rd, result);
return true;
@ -225,8 +225,8 @@ bool TranslatorVisitor::BICS(bool sf, Imm<2> shift, Reg Rm, Imm<6> imm6, Reg Rn,
const u8 shift_amount = imm6.ZeroExtend<u8>();
const auto operand1 = X(datasize, Rn);
const auto operand2 = ir.Not(ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount)));
const auto result = ir.And(operand1, operand2);
const auto operand2 = ShiftReg(datasize, Rm, shift, ir.Imm8(shift_amount));
const auto result = ir.AndNot(operand1, operand2);
ir.SetNZCV(ir.NZCVFrom(result));
X(datasize, Rd, result);

View file

@ -65,7 +65,7 @@ bool SM3TT2(TranslatorVisitor& v, Vec Vm, Imm<2> imm2, Vec Vn, Vec Vd, SM3TTVari
return v.ir.Eor(after_low_d, v.ir.Eor(top_d, before_top_d));
}
const IR::U32 tmp1 = v.ir.And(top_d, before_top_d);
const IR::U32 tmp2 = v.ir.And(v.ir.Not(top_d), after_low_d);
const IR::U32 tmp2 = v.ir.AndNot(after_low_d, top_d);
return v.ir.Or(tmp1, tmp2);
}();
const IR::U32 final_tt2 = v.ir.Add(tt2, v.ir.Add(low_d, v.ir.Add(top_n, wj)));

View file

@ -156,7 +156,7 @@ bool ShiftAndInsert(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec
return v.ir.LogicalShiftLeft(operand1, v.ir.Imm8(shift_amount));
}();
const IR::U64 result = v.ir.Or(v.ir.And(operand2, v.ir.Not(v.ir.Imm64(mask))), shifted);
const IR::U64 result = v.ir.Or(v.ir.AndNot(operand2, v.ir.Imm64(mask)), shifted);
v.V_scalar(esize, Vd, result);
return true;
}

View file

@ -50,7 +50,7 @@ IR::U128 SHA512Hash(IREmitter& ir, Vec Vm, Vec Vn, Vec Vd, SHA512HashPart part)
const IR::U64 tmp1 = ir.And(a, b);
if (part == SHA512HashPart::Part1) {
const IR::U64 tmp2 = ir.And(ir.Not(a), c);
const IR::U64 tmp2 = ir.AndNot(c, a);
return ir.Eor(tmp1, tmp2);
}

View file

@ -15,7 +15,7 @@ bool TranslatorVisitor::AXFlag() {
const IR::U32 v = ir.And(nzcv, ir.Imm32(0x10000000));
const IR::U32 new_z = ir.Or(ir.LogicalShiftLeft(v, ir.Imm8(2)), z);
const IR::U32 new_c = ir.And(ir.And(c, ir.Not(ir.LogicalShiftLeft(v, ir.Imm8(1)))), ir.Imm32(0x20000000));
const IR::U32 new_c = ir.And(ir.AndNot(c, ir.LogicalShiftLeft(v, ir.Imm8(1))), ir.Imm32(0x20000000));
ir.SetNZCVRaw(ir.Or(new_z, new_c));
return true;
@ -27,8 +27,8 @@ bool TranslatorVisitor::XAFlag() {
const IR::U32 z = ir.And(nzcv, ir.Imm32(0x40000000));
const IR::U32 c = ir.And(nzcv, ir.Imm32(0x20000000));
const IR::U32 not_z = ir.And(ir.Not(z), ir.Imm32(0x40000000));
const IR::U32 not_c = ir.And(ir.Not(c), ir.Imm32(0x20000000));
const IR::U32 not_z = ir.AndNot(ir.Imm32(0x40000000), z);
const IR::U32 not_c = ir.AndNot(ir.Imm32(0x20000000), c);
const IR::U32 new_n = ir.And(ir.LogicalShiftLeft(not_c, ir.Imm8(2)),
ir.LogicalShiftLeft(not_z, ir.Imm8(1)));

View file

@ -317,6 +317,15 @@ U32U64 IREmitter::And(const U32U64& a, const U32U64& b) {
}
}
U32U64 IREmitter::AndNot(const U32U64& a, const U32U64& b) {
ASSERT(a.GetType() == b.GetType());
if (a.GetType() == Type::U32) {
return Inst<U32>(Opcode::AndNot32, a, b);
} else {
return Inst<U64>(Opcode::AndNot64, a, b);
}
}
U32U64 IREmitter::Eor(const U32U64& a, const U32U64& b) {
ASSERT(a.GetType() == b.GetType());
if (a.GetType() == Type::U32) {

View file

@ -144,6 +144,7 @@ public:
U32U64 UnsignedDiv(const U32U64& a, const U32U64& b);
U32U64 SignedDiv(const U32U64& a, const U32U64& b);
U32U64 And(const U32U64& a, const U32U64& b);
U32U64 AndNot(const U32U64& a, const U32U64& b);
U32U64 Eor(const U32U64& a, const U32U64& b);
U32U64 Or(const U32U64& a, const U32U64& b);
U32U64 Not(const U32U64& a);

View file

@ -566,6 +566,8 @@ bool Inst::MayGetNZCVFromOp() const {
case Opcode::Sub64:
case Opcode::And32:
case Opcode::And64:
case Opcode::AndNot32:
case Opcode::AndNot64:
case Opcode::Eor32:
case Opcode::Eor64:
case Opcode::Or32:

View file

@ -143,6 +143,8 @@ OPCODE(SignedDiv32, U32, U32,
OPCODE(SignedDiv64, U64, U64, U64 )
OPCODE(And32, U32, U32, U32 )
OPCODE(And64, U64, U64, U64 )
OPCODE(AndNot32, U32, U32, U32 )
OPCODE(AndNot64, U64, U64, U64 )
OPCODE(Eor32, U32, U32, U32 )
OPCODE(Eor64, U64, U64, U64 )
OPCODE(Or32, U32, U32, U32 )