1
1
Fork 0
forked from suyu/suyu

Merge pull request #4391 from lioncash/nrvo

video_core: Allow copy elision to take place where applicable
This commit is contained in:
bunnei 2020-07-24 06:33:09 -07:00 committed by GitHub
commit f650cf8a9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 26 additions and 26 deletions

View file

@ -1704,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) {
} }
std::string ARBDecompiler::HUnpack(Operation operation) { std::string ARBDecompiler::HUnpack(Operation operation) {
const std::string operand = Visit(operation[0]); std::string operand = Visit(operation[0]);
switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
case Tegra::Shader::HalfType::H0_H1: case Tegra::Shader::HalfType::H0_H1:
return operand; return operand;
@ -2054,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) {
std::string ARBDecompiler::YNegate(Operation) { std::string ARBDecompiler::YNegate(Operation) {
LOG_WARNING(Render_OpenGL, "(STUBBED)"); LOG_WARNING(Render_OpenGL, "(STUBBED)");
const std::string temporary = AllocTemporary(); std::string temporary = AllocTemporary();
AddLine("MOV.F {}, 1;", temporary); AddLine("MOV.F {}, 1;", temporary);
return temporary; return temporary;
} }

View file

@ -126,7 +126,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info}; entry.graphics_info, entry.compute_info};
const auto registry = std::make_shared<Registry>(entry.type, info); auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) { for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address; const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value); registry->InsertKey(buffer, offset, value);

View file

@ -1919,7 +1919,7 @@ private:
Expression Comparison(Operation operation) { Expression Comparison(Operation operation) {
static_assert(!unordered || type == Type::Float); static_assert(!unordered || type == Type::Float);
const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
// GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's

View file

@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
const Node value = [&]() { const Node value = [&] {
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
if (opcode->get().GetId() != OpCode::Id::IADD3_R) { if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
} }
const Node shifted = [&]() { const Node shifted = [&] {
switch (instr.iadd3.mode) { switch (instr.iadd3.mode) {
case Tegra::Shader::IAdd3Mode::RightShift: case Tegra::Shader::IAdd3Mode::RightShift:
// TODO(tech4me): According to // TODO(tech4me): According to

View file

@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
return pc; return pc;
} }
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
Tegra::Shader::VideoType type, u64 byte_height) { u64 byte_height) {
if (!is_chunk) { if (!is_chunk) {
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
} }
const Node zero = Immediate(0);
switch (type) { switch (type) {
case Tegra::Shader::VideoType::Size16_Low: case VideoType::Size16_Low:
return BitfieldExtract(op, 0, 16); return BitfieldExtract(op, 0, 16);
case Tegra::Shader::VideoType::Size16_High: case VideoType::Size16_High:
return BitfieldExtract(op, 16, 16); return BitfieldExtract(op, 16, 16);
case Tegra::Shader::VideoType::Size32: case VideoType::Size32:
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
UNIMPLEMENTED(); UNIMPLEMENTED();
return zero; return Immediate(0);
case Tegra::Shader::VideoType::Invalid: case VideoType::Invalid:
UNREACHABLE_MSG("Invalid instruction encoding"); UNREACHABLE_MSG("Invalid instruction encoding");
return zero; return Immediate(0);
default: default:
UNREACHABLE(); UNREACHABLE();
return zero; return Immediate(0);
} }
} }

View file

@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
SetTemporary(bb, 0, product); SetTemporary(bb, 0, product);
product = GetTemporary(0); product = GetTemporary(0);
const Node original_c = op_c; Node original_c = op_c;
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
op_c = [&]() { op_c = [&] {
switch (set_mode) { switch (set_mode) {
case Tegra::Shader::XmadMode::None: case Tegra::Shader::XmadMode::None:
return original_c; return original_c;
case Tegra::Shader::XmadMode::CLo: case Tegra::Shader::XmadMode::CLo:
return BitfieldExtract(original_c, 0, 16); return BitfieldExtract(std::move(original_c), 0, 16);
case Tegra::Shader::XmadMode::CHi: case Tegra::Shader::XmadMode::CHi:
return BitfieldExtract(original_c, 16, 16); return BitfieldExtract(std::move(original_c), 16, 16);
case Tegra::Shader::XmadMode::CBcc: { case Tegra::Shader::XmadMode::CBcc: {
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
original_b, Immediate(16)); original_b, Immediate(16));
return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
std::move(shifted_b));
} }
case Tegra::Shader::XmadMode::CSfu: { case Tegra::Shader::XmadMode::CSfu: {
const Node comp_a = const Node comp_a =

View file

@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
} }
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
const Node node = MakeNode<InternalFlagNode>(flag); Node node = MakeNode<InternalFlagNode>(flag);
if (negated) { if (negated) {
return Operation(OperationCode::LogicalNegate, node); return Operation(OperationCode::LogicalNegate, std::move(node));
} }
return node; return node;
} }