emit_x64_floating_point: Correct FP{Max,Min}{32,64} implementations for -0/+0
This commit is contained in:
parent
2080a51f41
commit
6541ec064d
1 changed files with 84 additions and 12 deletions
|
@ -4,6 +4,8 @@
|
||||||
* General Public License version 2 or any later version.
|
* General Public License version 2 or any later version.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
#include "backend_x64/abi.h"
|
#include "backend_x64/abi.h"
|
||||||
#include "backend_x64/block_of_code.h"
|
#include "backend_x64/block_of_code.h"
|
||||||
#include "backend_x64/emit_x64.h"
|
#include "backend_x64/emit_x64.h"
|
||||||
|
@ -213,7 +215,8 @@ static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
template <typename Function>
|
||||||
|
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
@ -229,7 +232,11 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||||
end = PreProcessNaNs32(code, result, operand);
|
end = PreProcessNaNs32(code, result, operand);
|
||||||
}
|
}
|
||||||
(code.*fn)(result, operand);
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.*fn)(result, operand);
|
||||||
|
} else {
|
||||||
|
fn(result, operand);
|
||||||
|
}
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero32(code, result, gpr_scratch);
|
FlushToZero32(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -243,7 +250,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
template <typename Function>
|
||||||
|
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
@ -259,7 +267,11 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||||
end = PreProcessNaNs64(code, result, operand);
|
end = PreProcessNaNs64(code, result, operand);
|
||||||
}
|
}
|
||||||
(code.*fn)(result, operand);
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.*fn)(result, operand);
|
||||||
|
} else {
|
||||||
|
fn(result, operand);
|
||||||
|
}
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero64(code, result, gpr_scratch);
|
FlushToZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -273,7 +285,8 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
template <typename Function>
|
||||||
|
static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
@ -287,7 +300,11 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
|
||||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||||
end = ProcessNaN32(code, result);
|
end = ProcessNaN32(code, result);
|
||||||
}
|
}
|
||||||
(code.*fn)(result, result);
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.*fn)(result, result);
|
||||||
|
} else {
|
||||||
|
fn(result);
|
||||||
|
}
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero32(code, result, gpr_scratch);
|
FlushToZero32(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -301,7 +318,8 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
template <typename Function>
|
||||||
|
static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
@ -315,7 +333,11 @@ static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
|
||||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||||
end = ProcessNaN64(code, result);
|
end = ProcessNaN64(code, result);
|
||||||
}
|
}
|
||||||
(code.*fn)(result, result);
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.*fn)(result, result);
|
||||||
|
} else {
|
||||||
|
fn(result);
|
||||||
|
}
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero64(code, result, gpr_scratch);
|
FlushToZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -382,19 +404,69 @@ void EmitX64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMax32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMax32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::maxss);
|
FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
|
||||||
|
Xbyak::Label normal, end;
|
||||||
|
code.ucomiss(result, operand);
|
||||||
|
code.jnz(normal);
|
||||||
|
if (!ctx.AccurateNaN()) {
|
||||||
|
Xbyak::Label notnan;
|
||||||
|
code.jnp(notnan);
|
||||||
|
code.addss(result, operand);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(notnan);
|
||||||
|
}
|
||||||
|
code.andps(result, operand);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(normal);
|
||||||
|
code.maxss(result, operand);
|
||||||
|
code.L(end);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMax64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMax64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::maxsd);
|
FPThreeOp64(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
|
||||||
|
Xbyak::Label normal, end;
|
||||||
|
code.ucomisd(result, operand);
|
||||||
|
code.jnz(normal);
|
||||||
|
if (!ctx.AccurateNaN()) {
|
||||||
|
Xbyak::Label notnan;
|
||||||
|
code.jnp(notnan);
|
||||||
|
code.addsd(result, operand);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(notnan);
|
||||||
|
}
|
||||||
|
code.andps(result, operand);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(normal);
|
||||||
|
code.maxsd(result, operand);
|
||||||
|
code.L(end);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::minss);
|
FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
|
||||||
|
Xbyak::Label normal, end;
|
||||||
|
code.ucomiss(result, operand);
|
||||||
|
code.jnz(normal);
|
||||||
|
code.orps(result, operand);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(normal);
|
||||||
|
code.minss(result, operand);
|
||||||
|
code.L(end);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMin64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMin64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::minsd);
|
FPThreeOp64(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
|
||||||
|
Xbyak::Label normal, end;
|
||||||
|
code.ucomisd(result, operand);
|
||||||
|
code.jnz(normal);
|
||||||
|
code.orps(result, operand);
|
||||||
|
code.jmp(end);
|
||||||
|
code.L(normal);
|
||||||
|
code.minsd(result, operand);
|
||||||
|
code.L(end);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
Loading…
Reference in a new issue