diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 618ff076..ac75a46b 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -222,7 +222,7 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand); } -void EmitX64::EmitVectorLowerBroadcast8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -241,7 +241,7 @@ void EmitX64::EmitVectorLowerBroadcast8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } -void EmitX64::EmitVectorLowerBroadcast16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -251,7 +251,7 @@ void EmitX64::EmitVectorLowerBroadcast16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } -void EmitX64::EmitVectorLowerBroadcast32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -311,26 +311,10 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } -void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { - EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::por); -} - void EmitX64::EmitVectorEor(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pxor); } -void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(); - - code.pcmpeqw(xmm_b, xmm_b); - code.pxor(xmm_a, xmm_b); - - ctx.reg_alloc.DefineValue(inst, xmm_a); -} - void EmitX64::EmitVectorEqual8(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpeqb); } @@ -430,7 +414,23 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { EmitVectorInterleaveLower(code, ctx, inst, 64); } -void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(); + + code.pcmpeqw(xmm_b, xmm_b); + code.pxor(xmm_a, xmm_b); + + ctx.reg_alloc.DefineValue(inst, xmm_a); +} + +void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::por); +} + +void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -448,7 +448,7 @@ void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -471,7 +471,7 @@ void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, xmm_a); } -void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/frontend/A64/translate/impl/simd_copy.cpp b/src/frontend/A64/translate/impl/simd_copy.cpp index 6a3359f3..c2576fee 100644 --- a/src/frontend/A64/translate/impl/simd_copy.cpp +++ b/src/frontend/A64/translate/impl/simd_copy.cpp @@ -21,11 +21,11 @@ bool TranslatorVisitor::DUP_gen(bool Q, Imm<5> imm5, Reg Rn, Vec Vd) { const IR::U128 result = [&]{ switch (esize) { case 8: - return Q ? ir.VectorBroadcast8(element) : ir.VectorLowerBroadcast8(element); + return Q ? ir.VectorBroadcast8(element) : ir.VectorBroadcastLower8(element); case 16: - return Q ? ir.VectorBroadcast16(element) : ir.VectorLowerBroadcast16(element); + return Q ? ir.VectorBroadcast16(element) : ir.VectorBroadcastLower16(element); case 32: - return Q ? ir.VectorBroadcast32(element) : ir.VectorLowerBroadcast32(element); + return Q ? ir.VectorBroadcast32(element) : ir.VectorBroadcastLower32(element); default: return ir.VectorBroadcast64(element); } diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index 947e3c92..62223bb9 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -45,11 +45,11 @@ bool TranslatorVisitor::ADDP_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 result = [&]{ switch (esize) { case 8: - return Q ? ir.VectorPairedAdd8(operand1, operand2) : ir.VectorLowerPairedAdd8(operand1, operand2); + return Q ? ir.VectorPairedAdd8(operand1, operand2) : ir.VectorPairedAddLower8(operand1, operand2); case 16: - return Q ? ir.VectorPairedAdd16(operand1, operand2) : ir.VectorLowerPairedAdd16(operand1, operand2); + return Q ? ir.VectorPairedAdd16(operand1, operand2) : ir.VectorPairedAddLower16(operand1, operand2); case 32: - return Q ? ir.VectorPairedAdd32(operand1, operand2) : ir.VectorLowerPairedAdd32(operand1, operand2); + return Q ? ir.VectorPairedAdd32(operand1, operand2) : ir.VectorPairedAddLower32(operand1, operand2); default: return ir.VectorPairedAdd64(operand1, operand2); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 2697178d..b3151575 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -805,28 +805,16 @@ U128 IREmitter::VectorAnd(const U128& a, const U128& b) { return Inst(Opcode::VectorAnd, a, b); } -U128 IREmitter::VectorOr(const U128& a, const U128& b) { - return Inst(Opcode::VectorOr, a, b); +U128 IREmitter::VectorBroadcastLower8(const U8& a) { + return Inst(Opcode::VectorBroadcastLower8, a); } -U128 IREmitter::VectorEor(const U128& a, const U128& b) { - return Inst(Opcode::VectorEor, a, b); +U128 IREmitter::VectorBroadcastLower16(const U16& a) { + return Inst(Opcode::VectorBroadcastLower16, a); } -U128 IREmitter::VectorNot(const U128& a) { - return Inst(Opcode::VectorNot, a); -} - -U128 IREmitter::VectorLowerBroadcast8(const U8& a) { - return Inst(Opcode::VectorLowerBroadcast8, a); -} - -U128 IREmitter::VectorLowerBroadcast16(const U16& a) { - return Inst(Opcode::VectorLowerBroadcast16, a); -} - -U128 IREmitter::VectorLowerBroadcast32(const U32& a) { - return Inst(Opcode::VectorLowerBroadcast32, a); +U128 IREmitter::VectorBroadcastLower32(const U32& a) { + return Inst(Opcode::VectorBroadcastLower32, a); } U128 IREmitter::VectorBroadcast8(const U8& a) { @@ -845,6 +833,10 @@ U128 IREmitter::VectorBroadcast64(const U64& a) { return Inst(Opcode::VectorBroadcast64, a); } +U128 IREmitter::VectorEor(const U128& a, const U128& b) { + return Inst(Opcode::VectorEor, a, b); +} + U128 IREmitter::VectorEqual8(const U128& a, const U128& b) { return Inst(Opcode::VectorEqual8, a, b); } @@ -881,16 +873,24 @@ U128 IREmitter::VectorInterleaveLower64(const U128& a, const U128& b) { return Inst(Opcode::VectorInterleaveLower64, a, b); } -U128 IREmitter::VectorLowerPairedAdd8(const U128& a, const U128& b) { - return Inst(Opcode::VectorLowerPairedAdd8, a, b); +U128 IREmitter::VectorNot(const U128& a) { + return Inst(Opcode::VectorNot, a); } -U128 IREmitter::VectorLowerPairedAdd16(const U128& a, const U128& b) { - return Inst(Opcode::VectorLowerPairedAdd16, a, b); +U128 IREmitter::VectorOr(const U128& a, const U128& b) { + return Inst(Opcode::VectorOr, a, b); } -U128 IREmitter::VectorLowerPairedAdd32(const U128& a, const U128& b) { - return Inst(Opcode::VectorLowerPairedAdd32, a, b); +U128 IREmitter::VectorPairedAddLower8(const U128& a, const U128& b) { + return Inst(Opcode::VectorPairedAddLower8, a, b); +} + +U128 IREmitter::VectorPairedAddLower16(const U128& a, const U128& b) { + return Inst(Opcode::VectorPairedAddLower16, a, b); +} + +U128 IREmitter::VectorPairedAddLower32(const U128& a, const U128& b) { + return Inst(Opcode::VectorPairedAddLower32, a, b); } U128 IREmitter::VectorPairedAdd8(const U128& a, const U128& b) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index f9812a21..c13d65e6 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -207,21 +207,19 @@ public: UAny VectorGetElement(size_t esize, const U128& a, size_t index); U128 VectorSetElement(size_t esize, const U128& a, size_t index, const UAny& elem); - U128 VectorAdd8(const U128& a, const U128& b); U128 VectorAdd16(const U128& a, const U128& b); U128 VectorAdd32(const U128& a, const U128& b); U128 VectorAdd64(const U128& a, const U128& b); + U128 VectorAdd8(const U128& a, const U128& b); U128 VectorAnd(const U128& a, const U128& b); - U128 VectorOr(const U128& a, const U128& b); - U128 VectorEor(const U128& a, const U128& b); - U128 VectorNot(const U128& a); - U128 VectorLowerBroadcast8(const U8& a); - U128 VectorLowerBroadcast16(const U16& a); - U128 VectorLowerBroadcast32(const U32& a); U128 VectorBroadcast8(const U8& a); U128 VectorBroadcast16(const U16& a); U128 VectorBroadcast32(const U32& a); U128 VectorBroadcast64(const U64& a); + U128 VectorBroadcastLower8(const U8& a); + U128 VectorBroadcastLower16(const U16& a); + U128 VectorBroadcastLower32(const U32& a); + U128 VectorEor(const U128& a, const U128& b); U128 VectorEqual8(const U128& a, const U128& b); U128 VectorEqual16(const U128& a, const U128& b); U128 VectorEqual32(const U128& a, const U128& b); @@ -231,13 +229,15 @@ public: U128 VectorInterleaveLower16(const U128& a, const U128& b); U128 VectorInterleaveLower32(const U128& a, const U128& b); U128 VectorInterleaveLower64(const U128& a, const U128& b); - U128 VectorLowerPairedAdd8(const U128& a, const U128& b); - U128 VectorLowerPairedAdd16(const U128& a, const U128& b); - U128 VectorLowerPairedAdd32(const U128& a, const U128& b); + U128 VectorNot(const U128& a); + U128 VectorOr(const U128& a, const U128& b); U128 VectorPairedAdd8(const U128& a, const U128& b); U128 VectorPairedAdd16(const U128& a, const U128& b); U128 VectorPairedAdd32(const U128& a, const U128& b); U128 VectorPairedAdd64(const U128& a, const U128& b); + U128 VectorPairedAddLower8(const U128& a, const U128& b); + U128 VectorPairedAddLower16(const U128& a, const U128& b); + U128 VectorPairedAddLower32(const U128& a, const U128& b); U128 VectorZeroUpper(const U128& a); U32U64 FPAbs(const U32U64& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 772e776c..9e03e4ee 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -203,16 +203,14 @@ OPCODE(VectorAdd16, T::U128, T::U128, T::U128 OPCODE(VectorAdd32, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd64, T::U128, T::U128, T::U128 ) OPCODE(VectorAnd, T::U128, T::U128, T::U128 ) -OPCODE(VectorOr, T::U128, T::U128, T::U128 ) -OPCODE(VectorEor, T::U128, T::U128, T::U128 ) -OPCODE(VectorNot, T::U128, T::U128 ) -OPCODE(VectorLowerBroadcast8, T::U128, T::U8 ) -OPCODE(VectorLowerBroadcast16, T::U128, T::U16 ) -OPCODE(VectorLowerBroadcast32, T::U128, T::U32 ) +OPCODE(VectorBroadcastLower8, T::U128, T::U8 ) +OPCODE(VectorBroadcastLower16, T::U128, T::U16 ) +OPCODE(VectorBroadcastLower32, T::U128, T::U32 ) OPCODE(VectorBroadcast8, T::U128, T::U8 ) OPCODE(VectorBroadcast16, T::U128, T::U16 ) OPCODE(VectorBroadcast32, T::U128, T::U32 ) OPCODE(VectorBroadcast64, T::U128, T::U64 ) +OPCODE(VectorEor, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual8, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual16, T::U128, T::U128, T::U128 ) OPCODE(VectorEqual32, T::U128, T::U128, T::U128 ) @@ -222,9 +220,11 @@ OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower64, T::U128, T::U128, T::U128 ) -OPCODE(VectorLowerPairedAdd8, T::U128, T::U128, T::U128 ) -OPCODE(VectorLowerPairedAdd16, T::U128, T::U128, T::U128 ) -OPCODE(VectorLowerPairedAdd32, T::U128, T::U128, T::U128 ) +OPCODE(VectorNot, T::U128, T::U128 ) +OPCODE(VectorOr, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedAddLower16, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedAddLower32, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd8, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )