From 2d6c064e66bac4cb871aa26a12066441a8852008 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 Dec 2018 16:50:36 -0300 Subject: [PATCH] shader_decode: Improve zero flag implementation --- src/video_core/shader/decode/arithmetic.cpp | 15 +++------ .../shader/decode/arithmetic_immediate.cpp | 6 ++-- .../shader/decode/arithmetic_integer.cpp | 31 +++++++++---------- .../decode/arithmetic_integer_immediate.cpp | 18 ++++------- src/video_core/shader/decode/bfe.cpp | 1 + src/video_core/shader/decode/bfi.cpp | 4 +-- src/video_core/shader/decode/conversion.cpp | 11 ++----- src/video_core/shader/decode/ffma.cpp | 3 +- src/video_core/shader/decode/float_set.cpp | 11 +++---- .../shader/decode/predicate_set_register.cpp | 6 ++++ src/video_core/shader/decode/shift.cpp | 14 ++++----- src/video_core/shader/decode/video.cpp | 5 +-- src/video_core/shader/decode/xmad.cpp | 1 + src/video_core/shader/shader_ir.cpp | 19 ++++++++++++ src/video_core/shader/shader_ir.h | 9 ++++-- 15 files changed, 79 insertions(+), 75 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index ef846bd9af..926abcc8e3 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -45,8 +45,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG( instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMUL is not implemented"); op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); @@ -75,21 +73,20 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FADD is not implemented"); - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -126,9 +123,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMNMX is not implemented"); - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); @@ -136,9 +130,10 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); + const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - SetRegister(bb, instr.gpr0, - Operation(OperationCode::Select, NO_PRECISE, condition, min, max)); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::RRO_C: diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 996b2537a3..1c6da94b48 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -22,24 +22,22 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { break; } case OpCode::Id::FMUL32_IMM: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FMUL32 is not implemented"); Node value = Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); value = GetSaturatedFloat(value, instr.fmul32.saturate); + SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::FADD32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FADD32I is not implemented"); const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, instr.fadd32i.negate_a); const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, instr.fadd32i.negate_b); const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); + SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); SetRegister(bb, instr.gpr0, value); break; } diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 931e0fa1d1..edd1695f41 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -34,22 +34,20 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD is not implemented"); UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); + const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::IADD3_C: case OpCode::Id::IADD3_R: case OpCode::Id::IADD3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD3 is not implemented"); - Node op_c = GetRegister(instr.gpr39); const auto ApplyHeight = [&](IAdd3Height height, Node value) { @@ -100,6 +98,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); }(); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -115,6 +114,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); + + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -139,24 +140,19 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP is not implemented"); - if (instr.alu.lop.invert_a) op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); if (instr.alu.lop.invert_b) op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, + instr.generates_cc); break; } case OpCode::Id::LOP3_C: case OpCode::Id::LOP3_R: case OpCode::Id::LOP3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP3 is not implemented"); - const Node op_c = GetRegister(instr.gpr39); const Node lut = [&]() { if (opcode->get().GetId() == OpCode::Id::LOP3_R) { @@ -166,15 +162,13 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { } }(); - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut); + WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); break; } case OpCode::Id::IMNMX_C: case OpCode::Id::IMNMX_R: case OpCode::Id::IMNMX_IMM: { UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IMNMX is not implemented"); const bool is_signed = instr.imnmx.is_signed; @@ -182,6 +176,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); + + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -247,7 +243,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { } void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut) { + Node imm_lut, bool sets_cc) { constexpr u32 lop_iterations = 32; const Node one = Immediate(1); const Node two = Immediate(2); @@ -284,6 +280,7 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No } } + SetInternalFlagsFromInteger(bb, value, sets_cc); SetRegister(bb, dest, value); } diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 3b8a60c6b4..3cbaeeaf5d 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -25,20 +25,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in IADD32I is not implemented"); UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::LOP32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in LOP32I is not implemented"); - if (instr.alu.lop32i.invert_a) op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); @@ -46,8 +43,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, - Tegra::Shader::PredicateResultMode::None, - Tegra::Shader::Pred::UnusedIndex); + PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); break; } default: @@ -60,7 +56,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, Node op_a, Node op_b, PredicateResultMode predicate_mode, - Pred predicate) { + Pred predicate, bool sets_cc) { const Node result = [&]() { switch (logic_op) { case LogicOperation::And: @@ -77,11 +73,9 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation } }(); - if (dest != Register::ZeroIndex) { - SetRegister(bb, dest, result); - } + SetInternalFlagsFromInteger(bb, result, sets_cc); + SetRegister(bb, dest, result); - using Tegra::Shader::PredicateResultMode; // Write the predicate value depending on the predicate mode. switch (predicate_mode) { case PredicateResultMode::None: diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 6532a3bce2..d3244fd403 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -35,6 +35,7 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) { const Node outer_shift = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); + SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); SetRegister(bb, instr.gpr0, outer_shift); break; } diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index b0d8d9eba0..ddb1872c64 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -16,8 +16,6 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.generates_cc); - const auto [base, packed_shift] = [&]() -> std::tuple { switch (opcode->get().GetId()) { case OpCode::Id::BFI_IMM_R: @@ -33,6 +31,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { const Node value = Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); + + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); return pc; diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 791f03fe0c..d5c75e8eb0 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -33,15 +33,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); } + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); - - if (instr.generates_cc) { - const Node zero_condition = - SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, zero_condition); - LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete."); - } - break; } case OpCode::Id::I2F_R: @@ -64,6 +57,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -103,6 +97,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a17ebd6db1..f3ab3d2e84 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -21,8 +21,6 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FFMA is not implemented"); const Node op_a = GetRegister(instr.gpr8); @@ -52,6 +50,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); value = GetSaturatedFloat(value, instr.alu.saturate_d); + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); return pc; diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index b69d94c2ea..8e266cc4e2 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -45,13 +45,12 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { const Node value = Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - SetRegister(bb, instr.gpr0, value); - - if (instr.generates_cc) { - const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, is_zero); - LOG_WARNING(HW_GPU, "FSET condition code is incomplete"); + if (instr.fset.bf) { + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + } else { + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); } + SetRegister(bb, instr.gpr0, value); return pc; } diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 6c58496c2d..58d20ceb57 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -32,6 +32,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); const Node value = Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); + + if (instr.pset.bf) { + SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + } else { + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); + } SetRegister(bb, instr.gpr0, value); return pc; diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3ba039d21a..e8ffdb818e 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -31,22 +31,20 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in SHR is not implemented"); - const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, PRECISE, op_a, op_b); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in SHL is not implemented"); - SetRegister(bb, instr.gpr0, - Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b)); + case OpCode::Id::SHL_IMM: { + const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); + SetRegister(bb, instr.gpr0, value); break; + } default: UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); } diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index b491fbadbb..609b3a257a 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -38,9 +38,6 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::VMAD: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in VMAD is not implemented"); - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; const Node op_c = GetRegister(instr.gpr39); @@ -53,8 +50,8 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); } + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); - break; } case OpCode::Id::VSETP: { diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 3e37aee4a7..88f1be27d5 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -86,6 +86,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); } + SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); SetRegister(bb, instr.gpr0, sum); return pc; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 4474af7c40..d7747103e9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -356,6 +357,24 @@ void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { SetRegister(bb, Register::ZeroIndex + 1 + id, value); } +void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { + if (!sets_cc) { + return; + } + const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); + SetInternalFlag(bb, InternalFlag::Zero, zerop); + LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); +} + +void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { + if (!sets_cc) { + return; + } + const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); + SetInternalFlag(bb, InternalFlag::Zero, zerop); + LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); +} + Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), Immediate(bits)); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0c8f4a265e..47f460bcfa 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -649,6 +649,11 @@ private: /// Sets a temporal. Internally it uses a post-RZ register void SetTemporal(BasicBlock& bb, u32 id, Node value); + /// Sets internal flags from a float + void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); + /// Sets internal flags from an integer + void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); + /// Conditionally absolute/negated float. Absolute is applied first Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); /// Conditionally saturates a float @@ -725,9 +730,9 @@ private: void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate); + Tegra::Shader::Pred predicate, bool sets_cc); void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut); + Node op_c, Node imm_lut, bool sets_cc); template Node Operation(OperationCode code, const T*... operands) {