emit_x64_floating_point: DenormalsAreZero is redundant as hardware already does DAZ
Exceptions: F{MIN,MAX}{,NM}
This commit is contained in:
parent
de9d8c461c
commit
8ef195db3c
1 changed files with 15 additions and 44 deletions
|
@ -205,11 +205,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
DenormalsAreZero<fsize>(code, result, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||||
end = ProcessNaN<fsize>(code, result);
|
end = ProcessNaN<fsize>(code, result);
|
||||||
}
|
}
|
||||||
|
@ -228,8 +224,13 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class CallDenormalsAreZero {
|
||||||
|
Yes,
|
||||||
|
No,
|
||||||
|
};
|
||||||
|
|
||||||
template <size_t fsize, typename PreprocessFunction, typename Function>
|
template <size_t fsize, typename PreprocessFunction, typename Function>
|
||||||
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unused]] PreprocessFunction preprocess, Function fn) {
|
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unused]] PreprocessFunction preprocess, Function fn, CallDenormalsAreZero call_denormals_are_zero = CallDenormalsAreZero::No) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
@ -238,7 +239,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus
|
||||||
Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ() && call_denormals_are_zero == CallDenormalsAreZero::Yes) {
|
||||||
DenormalsAreZero<fsize>(code, result, gpr_scratch);
|
DenormalsAreZero<fsize>(code, result, gpr_scratch);
|
||||||
DenormalsAreZero<fsize>(code, operand, gpr_scratch);
|
DenormalsAreZero<fsize>(code, operand, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -264,38 +265,8 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t fsize, typename Function>
|
template <size_t fsize, typename Function>
|
||||||
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, CallDenormalsAreZero call_denormals_are_zero = CallDenormalsAreZero::No) {
|
||||||
FPThreeOp<fsize>(code, ctx, inst, nullptr, fn);
|
FPThreeOp<fsize>(code, ctx, inst, nullptr, fn, call_denormals_are_zero);
|
||||||
}
|
|
||||||
|
|
||||||
template <size_t fsize, typename Function, typename NaNHandler>
|
|
||||||
void FPFourOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, NaNHandler nan_handler) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
Xbyak::Label end;
|
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
|
||||||
Xbyak::Xmm operand3 = ctx.reg_alloc.UseScratchXmm(args[2]);
|
|
||||||
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
|
||||||
|
|
||||||
if (ctx.FPSCR_FTZ()) {
|
|
||||||
DenormalsAreZero<fsize>(code, result, gpr_scratch);
|
|
||||||
DenormalsAreZero<fsize>(code, operand2, gpr_scratch);
|
|
||||||
DenormalsAreZero<fsize>(code, operand3, gpr_scratch);
|
|
||||||
}
|
|
||||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
|
||||||
PreProcessNaNs<fsize>(code, ctx, result, operand2, operand3, end, nan_handler);
|
|
||||||
}
|
|
||||||
fn(result, operand2, operand3);
|
|
||||||
if (ctx.FPSCR_DN()) {
|
|
||||||
DefaultNaN<fsize>(code, result);
|
|
||||||
} else if (ctx.AccurateNaN()) {
|
|
||||||
PostProcessNaNs<fsize>(code, result, operand2);
|
|
||||||
}
|
|
||||||
code.L(end);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
@ -400,7 +371,7 @@ static void EmitFPMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.L(equal);
|
code.L(equal);
|
||||||
code.andps(result, operand);
|
code.andps(result, operand);
|
||||||
code.L(end);
|
code.L(end);
|
||||||
});
|
}, CallDenormalsAreZero::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMax32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMax32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -442,7 +413,7 @@ void EmitX64::EmitFPMaxNumeric32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.jnz(normal);
|
code.jnz(normal);
|
||||||
code.andps(operand, result);
|
code.andps(operand, result);
|
||||||
code.L(normal);
|
code.L(normal);
|
||||||
}, &Xbyak::CodeGenerator::maxss);
|
}, &Xbyak::CodeGenerator::maxss, CallDenormalsAreZero::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMaxNumeric64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMaxNumeric64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -476,7 +447,7 @@ void EmitX64::EmitFPMaxNumeric64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.jnz(normal);
|
code.jnz(normal);
|
||||||
code.andps(operand, result);
|
code.andps(operand, result);
|
||||||
code.L(normal);
|
code.L(normal);
|
||||||
}, &Xbyak::CodeGenerator::maxsd);
|
}, &Xbyak::CodeGenerator::maxsd, CallDenormalsAreZero::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
|
@ -527,7 +498,7 @@ static void EmitFPMin(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.L(equal);
|
code.L(equal);
|
||||||
code.orps(result, operand);
|
code.orps(result, operand);
|
||||||
code.L(end);
|
code.L(end);
|
||||||
});
|
}, CallDenormalsAreZero::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -569,7 +540,7 @@ void EmitX64::EmitFPMinNumeric32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.jnz(normal);
|
code.jnz(normal);
|
||||||
code.orps(operand, result);
|
code.orps(operand, result);
|
||||||
code.L(normal);
|
code.L(normal);
|
||||||
}, &Xbyak::CodeGenerator::minss);
|
}, &Xbyak::CodeGenerator::minss, CallDenormalsAreZero::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMinNumeric64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMinNumeric64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -603,7 +574,7 @@ void EmitX64::EmitFPMinNumeric64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.jnz(normal);
|
code.jnz(normal);
|
||||||
code.orps(operand, result);
|
code.orps(operand, result);
|
||||||
code.L(normal);
|
code.L(normal);
|
||||||
}, &Xbyak::CodeGenerator::minsd);
|
}, &Xbyak::CodeGenerator::minsd, CallDenormalsAreZero::Yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
Loading…
Reference in a new issue