emit_x64_packed: Use const on locals where applicable

Normalizes the use of const across the source file.
This commit is contained in:
Lioncash 2019-05-02 18:59:58 -04:00 committed by MerryMage
parent 9b783a5527
commit 7316fa47b3
2 changed files with 91 additions and 91 deletions

View file

@ -20,14 +20,14 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
code.paddb(xmm_a, xmm_b); code.paddb(xmm_a, xmm_b);
if (ge_inst) { if (ge_inst) {
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
code.pcmpeqb(ones, ones); code.pcmpeqb(ones, ones);
@ -47,12 +47,12 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if (ge_inst) { if (ge_inst) {
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.pxor(xmm_ge, xmm_ge); code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_sum, xmm_a); code.movdqa(saturated_sum, xmm_a);
@ -74,15 +74,15 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
code.paddw(xmm_a, xmm_b); code.paddw(xmm_a, xmm_b);
if (ge_inst) { if (ge_inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
code.pcmpeqb(ones, ones); code.pcmpeqb(ones, ones);
@ -94,8 +94,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst); ctx.EraseInstruction(ge_inst);
} else { } else {
Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
// !(b <= a+b) == b > a+b // !(b <= a+b) == b > a+b
code.movdqa(tmp_a, xmm_a); code.movdqa(tmp_a, xmm_a);
@ -116,12 +116,12 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if (ge_inst) { if (ge_inst) {
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.pxor(xmm_ge, xmm_ge); code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_sum, xmm_a); code.movdqa(saturated_sum, xmm_a);
@ -143,11 +143,11 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if (ge_inst) { if (ge_inst) {
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.movdqa(xmm_ge, xmm_a); code.movdqa(xmm_ge, xmm_a);
code.pmaxub(xmm_ge, xmm_b); code.pmaxub(xmm_ge, xmm_b);
@ -166,12 +166,12 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if (ge_inst) { if (ge_inst) {
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.pxor(xmm_ge, xmm_ge); code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_sum, xmm_a); code.movdqa(saturated_sum, xmm_a);
@ -194,8 +194,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
if (!ge_inst) { if (!ge_inst) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
code.psubw(xmm_a, xmm_b); code.psubw(xmm_a, xmm_b);
@ -204,9 +204,9 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
} }
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.movdqa(xmm_ge, xmm_a); code.movdqa(xmm_ge, xmm_a);
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
@ -220,10 +220,10 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
// (a >= b) == !(b > a) // (a >= b) == !(b > a)
code.pcmpeqb(ones, ones); code.pcmpeqb(ones, ones);
@ -244,12 +244,12 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if (ge_inst) { if (ge_inst) {
Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code.pxor(xmm_ge, xmm_ge); code.pxor(xmm_ge, xmm_ge);
code.movdqa(saturated_diff, xmm_a); code.movdqa(saturated_diff, xmm_a);
@ -271,9 +271,9 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInXmm() || args[1].IsInXmm()) { if (args[0].IsInXmm() || args[1].IsInXmm()) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
// Since, // Since,
// pavg(a, b) == (a + b + 1) >> 1 // pavg(a, b) == (a + b + 1) >> 1
@ -288,11 +288,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
} else { } else {
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 and_a_b = reg_a; const Xbyak::Reg32 and_a_b = reg_a;
Xbyak::Reg32 result = reg_a; const Xbyak::Reg32 result = reg_a;
// This relies on the equality x+y == ((x&y) << 1) + (x^y). // This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -314,9 +314,9 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInXmm() || args[1].IsInXmm()) { if (args[0].IsInXmm() || args[1].IsInXmm()) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, xmm_a); code.movdqa(tmp, xmm_a);
code.pand(xmm_a, xmm_b); code.pand(xmm_a, xmm_b);
@ -326,11 +326,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
} else { } else {
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 and_a_b = reg_a; const Xbyak::Reg32 and_a_b = reg_a;
Xbyak::Reg32 result = reg_a; const Xbyak::Reg32 result = reg_a;
// This relies on the equality x+y == ((x&y) << 1) + (x^y). // This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -351,12 +351,12 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 and_a_b = reg_a; const Xbyak::Reg32 and_a_b = reg_a;
Xbyak::Reg32 result = reg_a; const Xbyak::Reg32 result = reg_a;
Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
// This relies on the equality x+y == ((x&y) << 1) + (x^y). // This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -380,9 +380,9 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// This relies on the equality x+y == ((x&y) << 1) + (x^y). // This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -401,8 +401,8 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -431,10 +431,10 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -467,8 +467,8 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -490,8 +490,8 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result. // Note that x^y always contains the LSB of the result.
@ -514,10 +514,10 @@ void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 reg_sum, reg_diff; Xbyak::Reg32 reg_sum, reg_diff;
if (is_signed) { if (is_signed) {
@ -546,8 +546,8 @@ void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool
if (ge_inst) { if (ge_inst) {
// The reg_b registers are no longer required. // The reg_b registers are no longer required.
Xbyak::Reg32 ge_sum = reg_b_hi; const Xbyak::Reg32 ge_sum = reg_b_hi;
Xbyak::Reg32 ge_diff = reg_b_lo; const Xbyak::Reg32 ge_diff = reg_b_lo;
code.mov(ge_sum, reg_sum); code.mov(ge_sum, reg_sum);
code.mov(ge_diff, reg_diff); code.mov(ge_diff, reg_diff);
@ -618,8 +618,8 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
(code.*fn)(xmm_a, xmm_b); (code.*fn)(xmm_a, xmm_b);
@ -665,12 +665,12 @@ void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm(); const size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm();
if (num_args_in_xmm >= 2) { if (num_args_in_xmm >= 2) {
Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]); const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
code.pand(from, ge); code.pand(from, ge);
code.pandn(ge, to); code.pandn(ge, to);
@ -678,9 +678,9 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, from); ctx.reg_alloc.DefineValue(inst, from);
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) { } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) {
Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
code.and_(from, ge); code.and_(from, ge);
code.andn(to, ge, to); code.andn(to, ge, to);
@ -688,9 +688,9 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, from); ctx.reg_alloc.DefineValue(inst, from);
} else { } else {
Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
code.and_(from, ge); code.and_(from, ge);
code.not_(ge); code.not_(ge);

View file

@ -106,7 +106,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
} }
if (overflow_inst) { if (overflow_inst) {
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.setb(overflow.cvt8()); code.setb(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow);