From fff8e019dc479d2a7bf82e8350784b314e233dd8 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 18 Jan 2018 13:00:07 +0000 Subject: [PATCH] reg_alloc: Consider bitwidth of data and registers when emitting instructions --- src/backend_x64/emit_x64.cpp | 92 ++++++++--------- src/backend_x64/hostloc.h | 12 +++ src/backend_x64/reg_alloc.cpp | 144 +++++++++++++++++++++++---- src/backend_x64/reg_alloc.h | 17 ++-- src/frontend/ir/microinstruction.cpp | 6 +- src/frontend/ir/microinstruction.h | 1 + 6 files changed, 193 insertions(+), 79 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 0dadd6c5..b20146f1 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -44,7 +44,7 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) void EmitContext::EraseInstruction(IR::Inst* inst) { block.Instructions().erase(inst); - inst->Invalidate(); + inst->ClearArgs(); } template @@ -187,10 +187,10 @@ void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { code->shr(result, 32); if (carry_inst) { - ctx.EraseInstruction(carry_inst); Xbyak::Reg64 carry = ctx.reg_alloc.ScratchGpr(); code->setc(carry.cvt8()); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -370,8 +370,6 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } } else { - ctx.EraseInstruction(carry_inst); - if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -392,8 +390,9 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code->and_(carry, 1); } - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -426,8 +425,9 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code->outLocalLabel(); - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } } } @@ -503,8 +503,6 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } } else { - ctx.EraseInstruction(carry_inst); - if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -524,8 +522,9 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { code->xor_(carry, carry); } - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -560,8 +559,9 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { code->outLocalLabel(); - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } } } @@ -635,8 +635,6 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, result); } } else { - ctx.EraseInstruction(carry_inst); - if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -653,8 +651,9 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) code->setc(carry); } - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -683,8 +682,9 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) code->outLocalLabel(); - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } } } @@ -748,8 +748,6 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } } else { - ctx.EraseInstruction(carry_inst); - if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -765,8 +763,9 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { code->setc(carry); } - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); @@ -795,8 +794,9 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { code->outLocalLabel(); - ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); + ctx.reg_alloc.DefineValue(inst, result); } } } @@ -838,11 +838,10 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { code->rcr(result, 1); if (carry_inst) { - ctx.EraseInstruction(carry_inst); - code->setc(carry); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -912,20 +911,20 @@ static void EmitAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit } if (nzcv_inst) { - ctx.EraseInstruction(nzcv_inst); code->lahf(); code->seto(code->al); ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); + ctx.EraseInstruction(nzcv_inst); } if (carry_inst) { - ctx.EraseInstruction(carry_inst); code->setc(carry); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); } if (overflow_inst) { - ctx.EraseInstruction(overflow_inst); code->seto(overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -990,21 +989,21 @@ static void EmitSub(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit } if (nzcv_inst) { - ctx.EraseInstruction(nzcv_inst); code->cmc(); code->lahf(); code->seto(code->al); ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); + ctx.EraseInstruction(nzcv_inst); } if (carry_inst) { - ctx.EraseInstruction(carry_inst); code->setnc(carry); ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.EraseInstruction(carry_inst); } if (overflow_inst) { - ctx.EraseInstruction(overflow_inst); code->seto(overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -1345,11 +1344,10 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { code->cmovo(result, overflow); if (overflow_inst) { - ctx.EraseInstruction(overflow_inst); - code->seto(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -1373,11 +1371,10 @@ void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { code->cmovo(result, overflow); if (overflow_inst) { - ctx.EraseInstruction(overflow_inst); - code->seto(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -1405,11 +1402,10 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { code->cmovbe(result, reg_a); if (overflow_inst) { - ctx.EraseInstruction(overflow_inst); - code->seta(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -1456,11 +1452,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { code->cmovbe(result, reg_a); if (overflow_inst) { - ctx.EraseInstruction(overflow_inst); - code->seta(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.EraseInstruction(overflow_inst); } ctx.reg_alloc.DefineValue(inst, result); @@ -1477,8 +1472,6 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { code->paddb(xmm_a, xmm_b); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); @@ -1490,6 +1483,7 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, ones); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } ctx.reg_alloc.DefineValue(inst, xmm_a); @@ -1504,8 +1498,6 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); @@ -1517,6 +1509,7 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, saturated_sum); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } code->paddb(xmm_a, xmm_b); @@ -1535,8 +1528,6 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { code->paddw(xmm_a, xmm_b); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); @@ -1549,6 +1540,7 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, ones); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } else { Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); @@ -1561,6 +1553,7 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { code->pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! ctx.reg_alloc.DefineValue(ge_inst, tmp_b); + ctx.EraseInstruction(ge_inst); } } @@ -1576,8 +1569,6 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); @@ -1589,6 +1580,7 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, saturated_sum); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } code->paddw(xmm_a, xmm_b); @@ -1605,8 +1597,6 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->movdqa(xmm_ge, xmm_a); @@ -1614,6 +1604,7 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { code->pcmpeqb(xmm_ge, xmm_a); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } code->psubb(xmm_a, xmm_b); @@ -1630,8 +1621,6 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); @@ -1643,6 +1632,7 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, saturated_sum); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } code->psubb(xmm_a, xmm_b); @@ -1659,8 +1649,6 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); @@ -1669,6 +1657,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { code->pcmpeqw(xmm_ge, xmm_a); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } else { Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); @@ -1682,6 +1671,7 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, ones); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } } @@ -1699,8 +1689,6 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); @@ -1712,6 +1700,7 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { code->pxor(xmm_ge, saturated_diff); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.EraseInstruction(ge_inst); } code->psubw(xmm_a, xmm_b); @@ -2005,8 +1994,6 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool } if (ge_inst) { - ctx.EraseInstruction(ge_inst); - // The reg_b registers are no longer required. Xbyak::Reg32 ge_sum = reg_b_hi; Xbyak::Reg32 ge_diff = reg_b_lo; @@ -2028,6 +2015,7 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool code->or_(ge_sum, ge_diff); ctx.reg_alloc.DefineValue(ge_inst, ge_sum); + ctx.EraseInstruction(ge_inst); } if (is_halving) { diff --git a/src/backend_x64/hostloc.h b/src/backend_x64/hostloc.h index 7be3de3e..172e069a 100644 --- a/src/backend_x64/hostloc.h +++ b/src/backend_x64/hostloc.h @@ -48,6 +48,18 @@ inline bool HostLocIsSpill(HostLoc reg) { return reg >= HostLoc::FirstSpill; } +inline size_t HostLocBitWidth(HostLoc loc) { + if (HostLocIsGPR(loc)) + return 64; + if (HostLocIsXMM(loc)) + return 128; + if (HostLocIsSpill(loc)) + return 64; + if (HostLocIsFlag(loc)) + return 1; + UNREACHABLE(); +} + using HostLocList = std::initializer_list; // RSP is preserved for function calls diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index e7ddf9e9..026bca92 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -39,6 +39,42 @@ static bool IsSameHostLocClass(HostLoc a, HostLoc b) { || (HostLocIsSpill(a) && HostLocIsSpill(b)); } +// Minimum number of bits required to represent a type +static size_t GetBitWidth(IR::Type type) { + switch (type) { + case IR::Type::A32Reg: + case IR::Type::A32ExtReg: + case IR::Type::A64Reg: + case IR::Type::A64Vec: + case IR::Type::CoprocInfo: + case IR::Type::Cond: + case IR::Type::Void: + ASSERT_MSG(false, "Type %zu cannot be represented at runtime", static_cast(type)); + return 0; + case IR::Type::Opaque: + ASSERT_MSG(false, "Not a concrete type"); + return 0; + case IR::Type::U1: + return 8; + case IR::Type::U8: + return 8; + case IR::Type::U16: + return 16; + case IR::Type::U32: + return 32; + case IR::Type::U64: + return 64; + case IR::Type::F32: + return 32; + case IR::Type::F64: + return 64; + case IR::Type::F128: + return 128; + case IR::Type::NZCVFlags: + return 32; // TODO: Update to 16 when flags optimization is done + } +} + bool HostLocInfo::IsLocked() const { return is_being_used; } @@ -51,10 +87,6 @@ bool HostLocInfo::IsLastUse() const { return !is_being_used && current_references == 1 && accumulated_uses + 1 == total_uses; } -bool HostLocInfo::ContainsValue(const IR::Inst* inst) const { - return std::find(values.begin(), values.end(), inst) != values.end(); -} - void HostLocInfo::ReadLock() { ASSERT(!is_scratch); is_being_used = true; @@ -66,11 +98,6 @@ void HostLocInfo::WriteLock() { is_scratch = true; } -void HostLocInfo::AddValue(IR::Inst* inst) { - values.push_back(inst); - total_uses += inst->UseCount(); -} - void HostLocInfo::AddArgReference() { current_references++; ASSERT(accumulated_uses + current_references <= total_uses); @@ -84,6 +111,7 @@ void HostLocInfo::EndOfAllocScope() { values.clear(); accumulated_uses = 0; total_uses = 0; + max_bit_width = 0; } ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); })); @@ -92,6 +120,20 @@ void HostLocInfo::EndOfAllocScope() { is_scratch = false; } +bool HostLocInfo::ContainsValue(const IR::Inst* inst) const { + return std::find(values.begin(), values.end(), inst) != values.end(); +} + +size_t HostLocInfo::GetMaxBitWidth() const { + return max_bit_width; +} + +void HostLocInfo::AddValue(IR::Inst* inst) { + values.push_back(inst); + total_uses += inst->UseCount(); + max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); +} + IR::Type Argument::GetType() const { return value.GetType(); } @@ -439,15 +481,16 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { void RegAlloc::Move(HostLoc to, HostLoc from) { ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked()); + ASSERT(LocInfo(from).GetMaxBitWidth() <= HostLocBitWidth(to)); if (LocInfo(from).IsEmpty()) { return; } + EmitMove(to, from); + LocInfo(to) = LocInfo(from); LocInfo(from) = {}; - - EmitMove(to, from); } void RegAlloc::CopyToScratch(HostLoc to, HostLoc from) { @@ -458,6 +501,8 @@ void RegAlloc::CopyToScratch(HostLoc to, HostLoc from) { void RegAlloc::Exchange(HostLoc a, HostLoc b) { ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked()); + ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b)); + ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a)); if (LocInfo(a).IsEmpty()) { Move(a, b); @@ -469,9 +514,9 @@ void RegAlloc::Exchange(HostLoc a, HostLoc b) { return; } - std::swap(LocInfo(a), LocInfo(b)); - EmitExchange(a, b); + + std::swap(LocInfo(a), LocInfo(b)); } void RegAlloc::MoveOutOfTheWay(HostLoc reg) { @@ -511,22 +556,81 @@ const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const { } void RegAlloc::EmitMove(HostLoc to, HostLoc from) { + const size_t bit_width = LocInfo(from).GetMaxBitWidth(); + if (HostLocIsXMM(to) && HostLocIsXMM(from)) { code->movaps(HostLocToXmm(to), HostLocToXmm(from)); } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { - code->mov(HostLocToReg64(to), HostLocToReg64(from)); + ASSERT(bit_width != 128); + if (bit_width == 64) { + code->mov(HostLocToReg64(to), HostLocToReg64(from)); + } else { + code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32()); + } } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) { - code->movq(HostLocToXmm(to), HostLocToReg64(from)); + ASSERT(bit_width != 128); + if (bit_width == 64) { + code->movq(HostLocToXmm(to), HostLocToReg64(from)); + } else { + code->movd(HostLocToXmm(to), HostLocToReg64(from).cvt32()); + } } else if (HostLocIsGPR(to) && HostLocIsXMM(from)) { - code->movq(HostLocToReg64(to), HostLocToXmm(from)); + ASSERT(bit_width != 128); + if (bit_width == 64) { + code->movq(HostLocToReg64(to), HostLocToXmm(from)); + } else { + code->movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from)); + } } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { - code->movsd(HostLocToXmm(to), spill_to_addr(from)); + Xbyak::Address spill_addr = spill_to_addr(from); + ASSERT(spill_addr.getBit() >= bit_width); + switch (bit_width) { + case 128: + code->movaps(HostLocToXmm(to), spill_addr); + break; + case 64: + code->movsd(HostLocToXmm(to), spill_addr); + break; + case 32: + case 16: + case 8: + code->movss(HostLocToXmm(to), spill_addr); + break; + default: + UNREACHABLE(); + } } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { - code->movsd(spill_to_addr(to), HostLocToXmm(from)); + Xbyak::Address spill_addr = spill_to_addr(to); + ASSERT(spill_addr.getBit() >= bit_width); + switch (bit_width) { + case 128: + code->movaps(spill_addr, HostLocToXmm(from)); + break; + case 64: + code->movsd(spill_addr, HostLocToXmm(from)); + break; + case 32: + case 16: + case 8: + code->movss(spill_addr, HostLocToXmm(from)); + break; + default: + UNREACHABLE(); + } } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { - code->mov(HostLocToReg64(to), spill_to_addr(from)); + ASSERT(bit_width != 128); + if (bit_width == 64) { + code->mov(HostLocToReg64(to), spill_to_addr(from)); + } else { + code->mov(HostLocToReg64(to).cvt32(), spill_to_addr(from)); + } } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { - code->mov(spill_to_addr(to), HostLocToReg64(from)); + ASSERT(bit_width != 128); + if (bit_width == 64) { + code->mov(spill_to_addr(to), HostLocToReg64(from)); + } else { + code->mov(spill_to_addr(to), HostLocToReg64(from).cvt32()); + } } else { ASSERT_MSG(false, "Invalid RegAlloc::EmitMove"); } diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index 0e9fff00..4032a361 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -32,24 +32,29 @@ public: bool IsEmpty() const; bool IsLastUse() const; - bool ContainsValue(const IR::Inst* inst) const; - void ReadLock(); void WriteLock(); - - void AddValue(IR::Inst* inst); - void AddArgReference(); void EndOfAllocScope(); + bool ContainsValue(const IR::Inst* inst) const; + size_t GetMaxBitWidth() const; + + void AddValue(IR::Inst* inst); + private: - std::vector values; + // Current instruction state bool is_being_used = false; bool is_scratch = false; + // Block state size_t current_references = 0; size_t accumulated_uses = 0; size_t total_uses = 0; + + // Value state + std::vector values; + size_t max_bit_width = 0; }; struct Argument { diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index bd68255b..be477012 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -360,13 +360,17 @@ void Inst::SetArg(size_t index, Value value) { } void Inst::Invalidate() { + ClearArgs(); + op = Opcode::Void; +} + +void Inst::ClearArgs() { for (auto& value : args) { if (!value.IsImmediate()) { UndoUse(value); } value = {}; } - op = Opcode::Void; } void Inst::ReplaceUsesWith(Value replacement) { diff --git a/src/frontend/ir/microinstruction.h b/src/frontend/ir/microinstruction.h index 04fd7bdf..1a9bbc1b 100644 --- a/src/frontend/ir/microinstruction.h +++ b/src/frontend/ir/microinstruction.h @@ -105,6 +105,7 @@ public: void SetArg(size_t index, Value value); void Invalidate(); + void ClearArgs(); void ReplaceUsesWith(Value replacement);