forked from suyu/suyu
Shaders: Fix multiplications between 0.0 and inf
The PICA200 semantics for multiplication are so that when multiplying inf by exactly 0.0, the result is 0.0, instead of NaN, as defined by IEEE. This is relied upon by games. Fixes #1024 (missing OoT interface items)
This commit is contained in:
parent
082b74fa24
commit
630a850d4d
3 changed files with 60 additions and 42 deletions
|
@ -1021,12 +1021,20 @@ struct float24 {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float24 Zero() {
|
||||||
|
return FromFloat32(0.f);
|
||||||
|
}
|
||||||
|
|
||||||
// Not recommended for anything but logging
|
// Not recommended for anything but logging
|
||||||
float ToFloat32() const {
|
float ToFloat32() const {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
float24 operator * (const float24& flt) const {
|
float24 operator * (const float24& flt) const {
|
||||||
|
if ((this->value == 0.f && flt.value == flt.value) ||
|
||||||
|
(flt.value == 0.f && this->value == this->value))
|
||||||
|
// PICA gives 0 instead of NaN when multiplying by inf
|
||||||
|
return Zero();
|
||||||
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
|
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1043,7 +1051,11 @@ struct float24 {
|
||||||
}
|
}
|
||||||
|
|
||||||
float24& operator *= (const float24& flt) {
|
float24& operator *= (const float24& flt) {
|
||||||
value *= flt.ToFloat32();
|
if ((this->value == 0.f && flt.value == flt.value) ||
|
||||||
|
(flt.value == 0.f && this->value == this->value))
|
||||||
|
// PICA gives 0 instead of NaN when multiplying by inf
|
||||||
|
*this = Zero();
|
||||||
|
else value *= flt.ToFloat32();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -246,6 +246,19 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
|
||||||
|
MOVAPS(scratch, R(src1));
|
||||||
|
CMPPS(scratch, R(src2), CMP_ORD);
|
||||||
|
|
||||||
|
MULPS(src1, R(src2));
|
||||||
|
|
||||||
|
MOVAPS(src2, R(src1));
|
||||||
|
CMPPS(src2, R(src2), CMP_UNORD);
|
||||||
|
|
||||||
|
XORPS(scratch, R(src2));
|
||||||
|
ANDPS(src1, R(scratch));
|
||||||
|
}
|
||||||
|
|
||||||
void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
|
void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
|
||||||
// Note: NXOR is used below to check for equality
|
// Note: NXOR is used below to check for equality
|
||||||
switch (instr.flow_control.op) {
|
switch (instr.flow_control.op) {
|
||||||
|
@ -309,21 +322,17 @@ void JitCompiler::Compile_DP3(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||||
DPPS(SRC1, R(SRC2), 0x7f);
|
|
||||||
} else {
|
|
||||||
MULPS(SRC1, R(SRC2));
|
|
||||||
|
|
||||||
MOVAPS(SRC2, R(SRC1));
|
MOVAPS(SRC2, R(SRC1));
|
||||||
SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
|
SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
|
||||||
MOVAPS(SRC3, R(SRC1));
|
MOVAPS(SRC3, R(SRC1));
|
||||||
SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
|
SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
ADDPS(SRC1, R(SRC2));
|
ADDPS(SRC1, R(SRC2));
|
||||||
ADDPS(SRC1, R(SRC3));
|
ADDPS(SRC1, R(SRC3));
|
||||||
}
|
|
||||||
|
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
@ -332,19 +341,15 @@ void JitCompiler::Compile_DP4(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||||
DPPS(SRC1, R(SRC2), 0xff);
|
|
||||||
} else {
|
|
||||||
MULPS(SRC1, R(SRC2));
|
|
||||||
|
|
||||||
MOVAPS(SRC2, R(SRC1));
|
MOVAPS(SRC2, R(SRC1));
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
||||||
ADDPS(SRC1, R(SRC2));
|
ADDPS(SRC1, R(SRC2));
|
||||||
|
|
||||||
MOVAPS(SRC2, R(SRC1));
|
MOVAPS(SRC2, R(SRC1));
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
||||||
ADDPS(SRC1, R(SRC2));
|
ADDPS(SRC1, R(SRC2));
|
||||||
}
|
|
||||||
|
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
@ -361,24 +366,23 @@ void JitCompiler::Compile_DPH(Instruction instr) {
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (Common::GetCPUCaps().sse4_1) {
|
||||||
// Set 4th component to 1.0
|
// Set 4th component to 1.0
|
||||||
BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
|
BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
|
||||||
DPPS(SRC1, R(SRC2), 0xff);
|
|
||||||
} else {
|
} else {
|
||||||
// Reverse to set the 4th component to 1.0
|
// Reverse to set the 4th component to 1.0
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
|
||||||
MOVSS(SRC1, R(ONE));
|
MOVSS(SRC1, R(ONE));
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
|
||||||
|
|
||||||
MULPS(SRC1, R(SRC2));
|
|
||||||
|
|
||||||
MOVAPS(SRC2, R(SRC1));
|
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
|
||||||
ADDPS(SRC1, R(SRC2));
|
|
||||||
|
|
||||||
MOVAPS(SRC2, R(SRC1));
|
|
||||||
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
|
||||||
ADDPS(SRC1, R(SRC2));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||||
|
|
||||||
|
MOVAPS(SRC2, R(SRC1));
|
||||||
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
||||||
|
ADDPS(SRC1, R(SRC2));
|
||||||
|
|
||||||
|
MOVAPS(SRC2, R(SRC1));
|
||||||
|
SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
||||||
|
ADDPS(SRC1, R(SRC2));
|
||||||
|
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -417,7 +421,7 @@ void JitCompiler::Compile_LG2(Instruction instr) {
|
||||||
void JitCompiler::Compile_MUL(Instruction instr) {
|
void JitCompiler::Compile_MUL(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
|
||||||
MULPS(SRC1, R(SRC2));
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,12 +639,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
|
Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Common::GetCPUCaps().fma) {
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
||||||
VFMADD213PS(SRC1, SRC2, R(SRC3));
|
ADDPS(SRC1, R(SRC3));
|
||||||
} else {
|
|
||||||
MULPS(SRC1, R(SRC2));
|
|
||||||
ADDPS(SRC1, R(SRC3));
|
|
||||||
}
|
|
||||||
|
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,6 +68,12 @@ private:
|
||||||
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
|
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
|
||||||
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
|
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
|
||||||
|
* zero by inf. Clobbers `src2` and `scratch`.
|
||||||
|
*/
|
||||||
|
void Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch);
|
||||||
|
|
||||||
void Compile_EvaluateCondition(Instruction instr);
|
void Compile_EvaluateCondition(Instruction instr);
|
||||||
void Compile_UniformCondition(Instruction instr);
|
void Compile_UniformCondition(Instruction instr);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue