reg_alloc: Consider bitwidth of data and registers when emitting instructions
This commit is contained in:
parent
144b629d8a
commit
fff8e019dc
6 changed files with 193 additions and 79 deletions
|
@ -44,7 +44,7 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
|||
|
||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||
block.Instructions().erase(inst);
|
||||
inst->Invalidate();
|
||||
inst->ClearArgs();
|
||||
}
|
||||
|
||||
template <typename JST>
|
||||
|
@ -187,10 +187,10 @@ void EmitX64<JST>::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->shr(result, 32);
|
||||
|
||||
if (carry_inst) {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
Xbyak::Reg64 carry = ctx.reg_alloc.ScratchGpr();
|
||||
code->setc(carry.cvt8());
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -370,8 +370,6 @@ void EmitX64<JST>::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
} else {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
u8 shift = shift_arg.GetImmediateU8();
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -392,8 +390,9 @@ void EmitX64<JST>::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->and_(carry, 1);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
} else {
|
||||
ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -426,8 +425,9 @@ void EmitX64<JST>::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code->outLocalLabel();
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -503,8 +503,6 @@ void EmitX64<JST>::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
} else {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
u8 shift = shift_arg.GetImmediateU8();
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -524,8 +522,9 @@ void EmitX64<JST>::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->xor_(carry, carry);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
} else {
|
||||
ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -560,8 +559,9 @@ void EmitX64<JST>::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code->outLocalLabel();
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -635,8 +635,6 @@ void EmitX64<JST>::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst)
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
} else {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
u8 shift = shift_arg.GetImmediateU8();
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -653,8 +651,9 @@ void EmitX64<JST>::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst)
|
|||
code->setc(carry);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
} else {
|
||||
ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -683,8 +682,9 @@ void EmitX64<JST>::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst)
|
|||
|
||||
code->outLocalLabel();
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -748,8 +748,6 @@ void EmitX64<JST>::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
} else {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
|
||||
if (shift_arg.IsImmediate()) {
|
||||
u8 shift = shift_arg.GetImmediateU8();
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -765,8 +763,9 @@ void EmitX64<JST>::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->setc(carry);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
} else {
|
||||
ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX);
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32();
|
||||
|
@ -795,8 +794,9 @@ void EmitX64<JST>::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code->outLocalLabel();
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -838,11 +838,10 @@ void EmitX64<JST>::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->rcr(result, 1);
|
||||
|
||||
if (carry_inst) {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
|
||||
code->setc(carry);
|
||||
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -912,20 +911,20 @@ static void EmitAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit
|
|||
}
|
||||
|
||||
if (nzcv_inst) {
|
||||
ctx.EraseInstruction(nzcv_inst);
|
||||
code->lahf();
|
||||
code->seto(code->al);
|
||||
ctx.reg_alloc.DefineValue(nzcv_inst, nzcv);
|
||||
ctx.EraseInstruction(nzcv_inst);
|
||||
}
|
||||
if (carry_inst) {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
code->setc(carry);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
}
|
||||
if (overflow_inst) {
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
code->seto(overflow);
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -990,21 +989,21 @@ static void EmitSub(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit
|
|||
}
|
||||
|
||||
if (nzcv_inst) {
|
||||
ctx.EraseInstruction(nzcv_inst);
|
||||
code->cmc();
|
||||
code->lahf();
|
||||
code->seto(code->al);
|
||||
ctx.reg_alloc.DefineValue(nzcv_inst, nzcv);
|
||||
ctx.EraseInstruction(nzcv_inst);
|
||||
}
|
||||
if (carry_inst) {
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
code->setnc(carry);
|
||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||
ctx.EraseInstruction(carry_inst);
|
||||
}
|
||||
if (overflow_inst) {
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
code->seto(overflow);
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -1345,11 +1344,10 @@ void EmitX64<JST>::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->cmovo(result, overflow);
|
||||
|
||||
if (overflow_inst) {
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
|
||||
code->seto(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -1373,11 +1371,10 @@ void EmitX64<JST>::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->cmovo(result, overflow);
|
||||
|
||||
if (overflow_inst) {
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
|
||||
code->seto(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -1405,11 +1402,10 @@ void EmitX64<JST>::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->cmovbe(result, reg_a);
|
||||
|
||||
if (overflow_inst) {
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
|
||||
code->seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -1456,11 +1452,10 @@ void EmitX64<JST>::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->cmovbe(result, reg_a);
|
||||
|
||||
if (overflow_inst) {
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
|
||||
code->seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -1477,8 +1472,6 @@ void EmitX64<JST>::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->paddb(xmm_a, xmm_b);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
|
@ -1490,6 +1483,7 @@ void EmitX64<JST>::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
|
@ -1504,8 +1498,6 @@ void EmitX64<JST>::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
|
@ -1517,6 +1509,7 @@ void EmitX64<JST>::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, saturated_sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code->paddb(xmm_a, xmm_b);
|
||||
|
@ -1535,8 +1528,6 @@ void EmitX64<JST>::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->paddw(xmm_a, xmm_b);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -1549,6 +1540,7 @@ void EmitX64<JST>::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
} else {
|
||||
Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -1561,6 +1553,7 @@ void EmitX64<JST>::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1576,8 +1569,6 @@ void EmitX64<JST>::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
|
@ -1589,6 +1580,7 @@ void EmitX64<JST>::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, saturated_sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code->paddw(xmm_a, xmm_b);
|
||||
|
@ -1605,8 +1597,6 @@ void EmitX64<JST>::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code->movdqa(xmm_ge, xmm_a);
|
||||
|
@ -1614,6 +1604,7 @@ void EmitX64<JST>::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pcmpeqb(xmm_ge, xmm_a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code->psubb(xmm_a, xmm_b);
|
||||
|
@ -1630,8 +1621,6 @@ void EmitX64<JST>::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
|
@ -1643,6 +1632,7 @@ void EmitX64<JST>::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, saturated_sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code->psubb(xmm_a, xmm_b);
|
||||
|
@ -1659,8 +1649,6 @@ void EmitX64<JST>::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
|
@ -1669,6 +1657,7 @@ void EmitX64<JST>::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pcmpeqw(xmm_ge, xmm_a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
} else {
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -1682,6 +1671,7 @@ void EmitX64<JST>::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1699,8 +1689,6 @@ void EmitX64<JST>::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
|
@ -1712,6 +1700,7 @@ void EmitX64<JST>::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code->pxor(xmm_ge, saturated_diff);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code->psubw(xmm_a, xmm_b);
|
||||
|
@ -2005,8 +1994,6 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool
|
|||
}
|
||||
|
||||
if (ge_inst) {
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
||||
// The reg_b registers are no longer required.
|
||||
Xbyak::Reg32 ge_sum = reg_b_hi;
|
||||
Xbyak::Reg32 ge_diff = reg_b_lo;
|
||||
|
@ -2028,6 +2015,7 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool
|
|||
code->or_(ge_sum, ge_diff);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
if (is_halving) {
|
||||
|
|
|
@ -48,6 +48,18 @@ inline bool HostLocIsSpill(HostLoc reg) {
|
|||
return reg >= HostLoc::FirstSpill;
|
||||
}
|
||||
|
||||
inline size_t HostLocBitWidth(HostLoc loc) {
|
||||
if (HostLocIsGPR(loc))
|
||||
return 64;
|
||||
if (HostLocIsXMM(loc))
|
||||
return 128;
|
||||
if (HostLocIsSpill(loc))
|
||||
return 64;
|
||||
if (HostLocIsFlag(loc))
|
||||
return 1;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
using HostLocList = std::initializer_list<HostLoc>;
|
||||
|
||||
// RSP is preserved for function calls
|
||||
|
|
|
@ -39,6 +39,42 @@ static bool IsSameHostLocClass(HostLoc a, HostLoc b) {
|
|||
|| (HostLocIsSpill(a) && HostLocIsSpill(b));
|
||||
}
|
||||
|
||||
// Minimum number of bits required to represent a type
|
||||
static size_t GetBitWidth(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::A32Reg:
|
||||
case IR::Type::A32ExtReg:
|
||||
case IR::Type::A64Reg:
|
||||
case IR::Type::A64Vec:
|
||||
case IR::Type::CoprocInfo:
|
||||
case IR::Type::Cond:
|
||||
case IR::Type::Void:
|
||||
ASSERT_MSG(false, "Type %zu cannot be represented at runtime", static_cast<size_t>(type));
|
||||
return 0;
|
||||
case IR::Type::Opaque:
|
||||
ASSERT_MSG(false, "Not a concrete type");
|
||||
return 0;
|
||||
case IR::Type::U1:
|
||||
return 8;
|
||||
case IR::Type::U8:
|
||||
return 8;
|
||||
case IR::Type::U16:
|
||||
return 16;
|
||||
case IR::Type::U32:
|
||||
return 32;
|
||||
case IR::Type::U64:
|
||||
return 64;
|
||||
case IR::Type::F32:
|
||||
return 32;
|
||||
case IR::Type::F64:
|
||||
return 64;
|
||||
case IR::Type::F128:
|
||||
return 128;
|
||||
case IR::Type::NZCVFlags:
|
||||
return 32; // TODO: Update to 16 when flags optimization is done
|
||||
}
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsLocked() const {
|
||||
return is_being_used;
|
||||
}
|
||||
|
@ -51,10 +87,6 @@ bool HostLocInfo::IsLastUse() const {
|
|||
return !is_being_used && current_references == 1 && accumulated_uses + 1 == total_uses;
|
||||
}
|
||||
|
||||
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
|
||||
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||
}
|
||||
|
||||
void HostLocInfo::ReadLock() {
|
||||
ASSERT(!is_scratch);
|
||||
is_being_used = true;
|
||||
|
@ -66,11 +98,6 @@ void HostLocInfo::WriteLock() {
|
|||
is_scratch = true;
|
||||
}
|
||||
|
||||
void HostLocInfo::AddValue(IR::Inst* inst) {
|
||||
values.push_back(inst);
|
||||
total_uses += inst->UseCount();
|
||||
}
|
||||
|
||||
void HostLocInfo::AddArgReference() {
|
||||
current_references++;
|
||||
ASSERT(accumulated_uses + current_references <= total_uses);
|
||||
|
@ -84,6 +111,7 @@ void HostLocInfo::EndOfAllocScope() {
|
|||
values.clear();
|
||||
accumulated_uses = 0;
|
||||
total_uses = 0;
|
||||
max_bit_width = 0;
|
||||
}
|
||||
|
||||
ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
|
||||
|
@ -92,6 +120,20 @@ void HostLocInfo::EndOfAllocScope() {
|
|||
is_scratch = false;
|
||||
}
|
||||
|
||||
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
|
||||
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||
}
|
||||
|
||||
size_t HostLocInfo::GetMaxBitWidth() const {
|
||||
return max_bit_width;
|
||||
}
|
||||
|
||||
void HostLocInfo::AddValue(IR::Inst* inst) {
|
||||
values.push_back(inst);
|
||||
total_uses += inst->UseCount();
|
||||
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
|
||||
}
|
||||
|
||||
IR::Type Argument::GetType() const {
|
||||
return value.GetType();
|
||||
}
|
||||
|
@ -439,15 +481,16 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
|||
|
||||
void RegAlloc::Move(HostLoc to, HostLoc from) {
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||
ASSERT(LocInfo(from).GetMaxBitWidth() <= HostLocBitWidth(to));
|
||||
|
||||
if (LocInfo(from).IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
EmitMove(to, from);
|
||||
|
||||
LocInfo(to) = LocInfo(from);
|
||||
LocInfo(from) = {};
|
||||
|
||||
EmitMove(to, from);
|
||||
}
|
||||
|
||||
void RegAlloc::CopyToScratch(HostLoc to, HostLoc from) {
|
||||
|
@ -458,6 +501,8 @@ void RegAlloc::CopyToScratch(HostLoc to, HostLoc from) {
|
|||
|
||||
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
|
||||
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
|
||||
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
|
||||
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
|
||||
|
||||
if (LocInfo(a).IsEmpty()) {
|
||||
Move(a, b);
|
||||
|
@ -469,9 +514,9 @@ void RegAlloc::Exchange(HostLoc a, HostLoc b) {
|
|||
return;
|
||||
}
|
||||
|
||||
std::swap(LocInfo(a), LocInfo(b));
|
||||
|
||||
EmitExchange(a, b);
|
||||
|
||||
std::swap(LocInfo(a), LocInfo(b));
|
||||
}
|
||||
|
||||
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
|
||||
|
@ -511,22 +556,81 @@ const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
|
|||
}
|
||||
|
||||
void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
||||
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
||||
|
||||
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||
code->movaps(HostLocToXmm(to), HostLocToXmm(from));
|
||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||
code->mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
|
||||
code->movq(HostLocToXmm(to), HostLocToReg64(from));
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->movq(HostLocToXmm(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code->movd(HostLocToXmm(to), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else if (HostLocIsGPR(to) && HostLocIsXMM(from)) {
|
||||
code->movq(HostLocToReg64(to), HostLocToXmm(from));
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->movq(HostLocToReg64(to), HostLocToXmm(from));
|
||||
} else {
|
||||
code->movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||
code->movsd(HostLocToXmm(to), spill_to_addr(from));
|
||||
Xbyak::Address spill_addr = spill_to_addr(from);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
code->movaps(HostLocToXmm(to), spill_addr);
|
||||
break;
|
||||
case 64:
|
||||
code->movsd(HostLocToXmm(to), spill_addr);
|
||||
break;
|
||||
case 32:
|
||||
case 16:
|
||||
case 8:
|
||||
code->movss(HostLocToXmm(to), spill_addr);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
||||
code->movsd(spill_to_addr(to), HostLocToXmm(from));
|
||||
Xbyak::Address spill_addr = spill_to_addr(to);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
code->movaps(spill_addr, HostLocToXmm(from));
|
||||
break;
|
||||
case 64:
|
||||
code->movsd(spill_addr, HostLocToXmm(from));
|
||||
break;
|
||||
case 32:
|
||||
case 16:
|
||||
case 8:
|
||||
code->movss(spill_addr, HostLocToXmm(from));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||
code->mov(HostLocToReg64(to), spill_to_addr(from));
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(HostLocToReg64(to), spill_to_addr(from));
|
||||
} else {
|
||||
code->mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||
code->mov(spill_to_addr(to), HostLocToReg64(from));
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(spill_to_addr(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code->mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
|
||||
}
|
||||
|
|
|
@ -32,24 +32,29 @@ public:
|
|||
bool IsEmpty() const;
|
||||
bool IsLastUse() const;
|
||||
|
||||
bool ContainsValue(const IR::Inst* inst) const;
|
||||
|
||||
void ReadLock();
|
||||
void WriteLock();
|
||||
|
||||
void AddValue(IR::Inst* inst);
|
||||
|
||||
void AddArgReference();
|
||||
void EndOfAllocScope();
|
||||
|
||||
bool ContainsValue(const IR::Inst* inst) const;
|
||||
size_t GetMaxBitWidth() const;
|
||||
|
||||
void AddValue(IR::Inst* inst);
|
||||
|
||||
private:
|
||||
std::vector<IR::Inst*> values;
|
||||
// Current instruction state
|
||||
bool is_being_used = false;
|
||||
bool is_scratch = false;
|
||||
|
||||
// Block state
|
||||
size_t current_references = 0;
|
||||
size_t accumulated_uses = 0;
|
||||
size_t total_uses = 0;
|
||||
|
||||
// Value state
|
||||
std::vector<IR::Inst*> values;
|
||||
size_t max_bit_width = 0;
|
||||
};
|
||||
|
||||
struct Argument {
|
||||
|
|
|
@ -360,13 +360,17 @@ void Inst::SetArg(size_t index, Value value) {
|
|||
}
|
||||
|
||||
void Inst::Invalidate() {
|
||||
ClearArgs();
|
||||
op = Opcode::Void;
|
||||
}
|
||||
|
||||
void Inst::ClearArgs() {
|
||||
for (auto& value : args) {
|
||||
if (!value.IsImmediate()) {
|
||||
UndoUse(value);
|
||||
}
|
||||
value = {};
|
||||
}
|
||||
op = Opcode::Void;
|
||||
}
|
||||
|
||||
void Inst::ReplaceUsesWith(Value replacement) {
|
||||
|
|
|
@ -105,6 +105,7 @@ public:
|
|||
void SetArg(size_t index, Value value);
|
||||
|
||||
void Invalidate();
|
||||
void ClearArgs();
|
||||
|
||||
void ReplaceUsesWith(Value replacement);
|
||||
|
||||
|
|
Loading…
Reference in a new issue