ir: Add opcode for floating-point GE and GT comparisons
The rest of the comparisons can be implemented in terms of these two
This commit is contained in:
parent
d86fea0d28
commit
c695da1cf3
4 changed files with 68 additions and 0 deletions
|
@ -209,6 +209,46 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
code.cmpltps(b, a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
code.cmpltpd(b, a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
code.cmpleps(b, a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
code.cmplepd(b, a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, b);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorMul32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::mulps);
|
||||
}
|
||||
|
|
|
@ -1514,6 +1514,28 @@ U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) {
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::FPVectorGreater32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::FPVectorGreater64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::FPVectorGreaterEqual32, a, b);
|
||||
case 64:
|
||||
return Inst<U128>(Opcode::FPVectorGreaterEqual64, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorMul(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 32:
|
||||
|
|
|
@ -275,6 +275,8 @@ public:
|
|||
U128 FPVectorAdd(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorDiv(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorEqual(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorGreater(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorS32ToSingle(const U128& a);
|
||||
|
|
|
@ -400,6 +400,10 @@ OPCODE(FPVectorDiv32, T::U128, T::U128, T::U
|
|||
OPCODE(FPVectorDiv64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorEqual32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorEqual64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorGreater32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorGreater64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorGreaterEqual32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorGreaterEqual64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorS32ToSingle, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue