2018-12-20 23:09:21 +01:00
|
|
|
// Copyright 2018 yuzu Emulator Project
|
|
|
|
// Licensed under GPLv2 or any later version
|
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
2018-12-13 20:59:28 +01:00
|
|
|
#include <algorithm>
|
2018-12-21 05:27:47 +01:00
|
|
|
#include <vector>
|
2018-12-29 06:44:54 +01:00
|
|
|
#include <fmt/format.h>
|
2018-12-21 05:27:47 +01:00
|
|
|
|
2018-12-20 23:09:21 +01:00
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/common_types.h"
|
2019-04-02 04:03:32 +02:00
|
|
|
#include "common/logging/log.h"
|
2018-12-20 23:09:21 +01:00
|
|
|
#include "video_core/engines/shader_bytecode.h"
|
|
|
|
#include "video_core/shader/shader_ir.h"
|
|
|
|
|
|
|
|
namespace VideoCommon::Shader {
|
|
|
|
|
2018-12-21 04:05:42 +01:00
|
|
|
using Tegra::Shader::Attribute;
|
2018-12-20 23:09:21 +01:00
|
|
|
using Tegra::Shader::Instruction;
|
|
|
|
using Tegra::Shader::OpCode;
|
2018-12-21 04:05:42 +01:00
|
|
|
using Tegra::Shader::Register;
|
2018-12-20 23:09:21 +01:00
|
|
|
|
2019-02-07 04:05:41 +01:00
|
|
|
namespace {
|
|
|
|
u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
|
|
|
|
switch (uniform_type) {
|
|
|
|
case Tegra::Shader::UniformType::Single:
|
|
|
|
return 1;
|
|
|
|
case Tegra::Shader::UniformType::Double:
|
|
|
|
return 2;
|
|
|
|
case Tegra::Shader::UniformType::Quad:
|
|
|
|
case Tegra::Shader::UniformType::UnsignedQuad:
|
|
|
|
return 4;
|
|
|
|
default:
|
|
|
|
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
2019-01-30 06:09:40 +01:00
|
|
|
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
2018-12-20 23:09:21 +01:00
|
|
|
const Instruction instr = {program_code[pc]};
|
|
|
|
const auto opcode = OpCode::Decode(instr);
|
|
|
|
|
2018-12-21 04:05:42 +01:00
|
|
|
switch (opcode->get().GetId()) {
|
|
|
|
case OpCode::Id::LD_A: {
|
|
|
|
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
|
|
|
|
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
|
|
|
"Indirect attribute loads are not supported");
|
|
|
|
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
|
|
|
"Unaligned attribute loads are not supported");
|
|
|
|
|
2019-02-13 02:14:39 +01:00
|
|
|
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
|
2018-12-21 04:05:42 +01:00
|
|
|
Tegra::Shader::IpaSampleMode::Default};
|
|
|
|
|
|
|
|
u64 next_element = instr.attribute.fmt20.element;
|
|
|
|
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
|
|
|
|
|
|
|
const auto LoadNextElement = [&](u32 reg_offset) {
|
|
|
|
const Node buffer = GetRegister(instr.gpr39);
|
|
|
|
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
|
|
|
|
next_element, input_mode, buffer);
|
|
|
|
|
|
|
|
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
|
|
|
|
|
|
|
|
// Load the next attribute element into the following register. If the element
|
|
|
|
// to load goes beyond the vec4 size, load the first element of the next
|
|
|
|
// attribute.
|
|
|
|
next_element = (next_element + 1) % 4;
|
|
|
|
next_index = next_index + (next_element == 0 ? 1 : 0);
|
|
|
|
};
|
|
|
|
|
|
|
|
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
|
|
|
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
|
|
|
LoadNextElement(reg_offset);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-12-21 06:08:52 +01:00
|
|
|
case OpCode::Id::LD_C: {
|
|
|
|
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
|
|
|
|
|
|
|
|
Node index = GetRegister(instr.gpr8);
|
|
|
|
|
|
|
|
const Node op_a =
|
2019-01-28 22:11:23 +01:00
|
|
|
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
|
2018-12-21 06:08:52 +01:00
|
|
|
|
|
|
|
switch (instr.ld_c.type.Value()) {
|
|
|
|
case Tegra::Shader::UniformType::Single:
|
|
|
|
SetRegister(bb, instr.gpr0, op_a);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Tegra::Shader::UniformType::Double: {
|
|
|
|
const Node op_b =
|
2019-01-28 22:11:23 +01:00
|
|
|
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
|
2018-12-21 06:08:52 +01:00
|
|
|
|
2018-12-27 05:50:22 +01:00
|
|
|
SetTemporal(bb, 0, op_a);
|
|
|
|
SetTemporal(bb, 1, op_b);
|
|
|
|
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
|
|
|
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
|
2018-12-21 06:08:52 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-12-21 06:33:15 +01:00
|
|
|
case OpCode::Id::LD_L: {
|
2019-04-02 04:18:13 +02:00
|
|
|
LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}",
|
|
|
|
static_cast<u64>(instr.ld_l.unknown.Value()));
|
2019-02-03 03:43:11 +01:00
|
|
|
|
|
|
|
const auto GetLmem = [&](s32 offset) {
|
|
|
|
ASSERT(offset % 4 == 0);
|
|
|
|
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
|
|
|
|
const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
|
|
|
|
immediate_offset);
|
|
|
|
return GetLocalMemory(address);
|
|
|
|
};
|
2018-12-21 06:33:15 +01:00
|
|
|
|
|
|
|
switch (instr.ldst_sl.type.Value()) {
|
2019-02-03 03:44:38 +01:00
|
|
|
case Tegra::Shader::StoreType::Bits32:
|
2019-02-03 04:35:20 +01:00
|
|
|
case Tegra::Shader::StoreType::Bits64:
|
|
|
|
case Tegra::Shader::StoreType::Bits128: {
|
|
|
|
const u32 count = [&]() {
|
|
|
|
switch (instr.ldst_sl.type.Value()) {
|
|
|
|
case Tegra::Shader::StoreType::Bits32:
|
|
|
|
return 1;
|
|
|
|
case Tegra::Shader::StoreType::Bits64:
|
|
|
|
return 2;
|
|
|
|
case Tegra::Shader::StoreType::Bits128:
|
|
|
|
return 4;
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}();
|
|
|
|
for (u32 i = 0; i < count; ++i)
|
|
|
|
SetTemporal(bb, i, GetLmem(i * 4));
|
|
|
|
for (u32 i = 0; i < count; ++i)
|
|
|
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
2018-12-21 06:33:15 +01:00
|
|
|
break;
|
2019-02-03 03:43:11 +01:00
|
|
|
}
|
2018-12-21 06:33:15 +01:00
|
|
|
default:
|
|
|
|
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
|
2019-02-03 03:44:38 +01:00
|
|
|
static_cast<u32>(instr.ldst_sl.type.Value()));
|
2018-12-21 06:33:15 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-12-29 06:44:54 +01:00
|
|
|
case OpCode::Id::LDG: {
|
2019-02-07 04:05:41 +01:00
|
|
|
const auto [real_address_base, base_address, descriptor] =
|
|
|
|
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
|
|
|
|
static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
|
2018-12-29 06:44:54 +01:00
|
|
|
|
2019-02-07 04:05:41 +01:00
|
|
|
const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
|
2018-12-29 06:44:54 +01:00
|
|
|
for (u32 i = 0; i < count; ++i) {
|
|
|
|
const Node it_offset = Immediate(i * 4);
|
|
|
|
const Node real_address =
|
2019-02-07 04:05:41 +01:00
|
|
|
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
2018-12-29 06:44:54 +01:00
|
|
|
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
|
|
|
|
|
|
|
SetTemporal(bb, i, gmem);
|
|
|
|
}
|
|
|
|
for (u32 i = 0; i < count; ++i) {
|
|
|
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2019-02-07 04:05:41 +01:00
|
|
|
case OpCode::Id::STG: {
|
|
|
|
const auto [real_address_base, base_address, descriptor] =
|
|
|
|
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
|
|
|
|
static_cast<u32>(instr.stg.immediate_offset.Value()), true);
|
|
|
|
|
|
|
|
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
|
|
|
SetTemporal(bb, 0, real_address_base);
|
|
|
|
|
|
|
|
const u32 count = GetUniformTypeElementsCount(instr.stg.type);
|
|
|
|
for (u32 i = 0; i < count; ++i) {
|
|
|
|
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
|
|
|
}
|
|
|
|
for (u32 i = 0; i < count; ++i) {
|
|
|
|
const Node it_offset = Immediate(i * 4);
|
|
|
|
const Node real_address =
|
|
|
|
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
|
|
|
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
|
|
|
|
|
|
|
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-12-21 04:06:13 +01:00
|
|
|
case OpCode::Id::ST_A: {
|
|
|
|
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
|
|
|
"Indirect attribute loads are not supported");
|
|
|
|
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
|
|
|
"Unaligned attribute loads are not supported");
|
|
|
|
|
|
|
|
u64 next_element = instr.attribute.fmt20.element;
|
|
|
|
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
|
|
|
|
|
|
|
const auto StoreNextElement = [&](u32 reg_offset) {
|
|
|
|
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
|
|
|
|
next_element, GetRegister(instr.gpr39));
|
|
|
|
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
|
|
|
|
|
|
|
|
bb.push_back(Operation(OperationCode::Assign, dest, src));
|
|
|
|
|
|
|
|
// Load the next attribute element into the following register. If the element
|
|
|
|
// to load goes beyond the vec4 size, load the first element of the next
|
|
|
|
// attribute.
|
|
|
|
next_element = (next_element + 1) % 4;
|
|
|
|
next_index = next_index + (next_element == 0 ? 1 : 0);
|
|
|
|
};
|
|
|
|
|
|
|
|
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
|
|
|
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
|
|
|
StoreNextElement(reg_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2018-12-21 06:33:31 +01:00
|
|
|
case OpCode::Id::ST_L: {
|
2019-04-02 04:03:32 +02:00
|
|
|
LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}",
|
|
|
|
static_cast<u64>(instr.st_l.cache_management.Value()));
|
2018-12-21 06:33:31 +01:00
|
|
|
|
2019-02-03 23:08:10 +01:00
|
|
|
const auto GetLmemAddr = [&](s32 offset) {
|
|
|
|
ASSERT(offset % 4 == 0);
|
|
|
|
const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
|
|
|
|
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
|
|
|
|
};
|
2018-12-21 06:33:31 +01:00
|
|
|
|
|
|
|
switch (instr.ldst_sl.type.Value()) {
|
2019-02-03 23:08:10 +01:00
|
|
|
case Tegra::Shader::StoreType::Bits128:
|
|
|
|
SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
|
|
|
|
SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
|
|
|
|
case Tegra::Shader::StoreType::Bits64:
|
|
|
|
SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
|
2019-02-03 03:44:38 +01:00
|
|
|
case Tegra::Shader::StoreType::Bits32:
|
2019-02-03 23:08:10 +01:00
|
|
|
SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
|
2018-12-21 06:33:31 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
|
|
|
|
static_cast<u32>(instr.ldst_sl.type.Value()));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-12-21 04:05:42 +01:00
|
|
|
default:
|
|
|
|
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
|
|
|
}
|
2018-12-20 23:09:21 +01:00
|
|
|
|
|
|
|
return pc;
|
|
|
|
}
|
|
|
|
|
2019-02-07 04:05:41 +01:00
|
|
|
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
|
|
|
|
Node addr_register,
|
|
|
|
u32 immediate_offset,
|
|
|
|
bool is_write) {
|
|
|
|
const Node base_address{
|
|
|
|
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
|
|
|
|
const auto cbuf = std::get_if<CbufNode>(base_address);
|
|
|
|
ASSERT(cbuf != nullptr);
|
|
|
|
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
|
|
|
|
ASSERT(cbuf_offset_imm != nullptr);
|
|
|
|
const auto cbuf_offset = cbuf_offset_imm->GetValue();
|
|
|
|
|
|
|
|
bb.push_back(
|
|
|
|
Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
|
|
|
|
|
|
|
|
const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
|
|
|
|
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
|
|
|
auto& usage = entry->second;
|
|
|
|
if (is_write) {
|
|
|
|
usage.is_written = true;
|
|
|
|
} else {
|
|
|
|
usage.is_read = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto real_address =
|
|
|
|
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
|
|
|
|
|
|
|
|
return {real_address, base_address, descriptor};
|
|
|
|
}
|
|
|
|
|
2019-02-03 22:07:20 +01:00
|
|
|
} // namespace VideoCommon::Shader
|