1
0
Fork 0
forked from suyu/suyu

Merge pull request #2485 from ReinUsesLisp/generic-memory

shader/memory: Implement generic memory stores and loads (ST and LD)
This commit is contained in:
bunnei 2019-05-24 18:24:26 -04:00 committed by GitHub
commit 1a2d90ab09
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 74 additions and 36 deletions

View file

@ -529,6 +529,11 @@ union Instruction {
BitField<39, 8, Register> gpr39; BitField<39, 8, Register> gpr39;
BitField<48, 16, u64> opcode; BitField<48, 16, u64> opcode;
union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
union { union {
BitField<20, 16, u64> imm20_16; BitField<20, 16, u64> imm20_16;
BitField<20, 19, u64> imm20_19; BitField<20, 19, u64> imm20_19;
@ -812,13 +817,11 @@ union Instruction {
union { union {
BitField<48, 3, UniformType> type; BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode; BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} ldg; } ldg;
union { union {
BitField<48, 3, UniformType> type; BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode; BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} stg; } stg;
union { union {
@ -827,6 +830,11 @@ union Instruction {
BitField<20, 11, u64> address; BitField<20, 11, u64> address;
} al2p; } al2p;
union {
BitField<53, 3, UniformType> type;
BitField<52, 1, u64> extended;
} generic;
union { union {
BitField<0, 3, u64> pred0; BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3; BitField<3, 3, u64> pred3;
@ -1387,10 +1395,12 @@ public:
LD_L, LD_L,
LD_S, LD_S,
LD_C, LD_C,
LD, // Load from generic memory
LDG, // Load from global memory
ST_A, ST_A,
ST_L, ST_L,
ST_S, ST_S,
LDG, // Load from global memory ST, // Store in generic memory
STG, // Store in global memory STG, // Store in global memory
AL2P, // Transforms attribute memory into physical memory AL2P, // Transforms attribute memory into physical memory
TEX, TEX,
@ -1658,10 +1668,12 @@ private:
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("100-------------", Id::LD, Type::Memory, "LD"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"),

View file

@ -146,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
break; break;
} }
case OpCode::Id::LD:
case OpCode::Id::LDG: { case OpCode::Id::LDG: {
const auto [real_address_base, base_address, descriptor] = const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), switch (opcode->get().GetId()) {
static_cast<u32>(instr.ldg.immediate_offset.Value()), false); case OpCode::Id::LD:
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
return instr.generic.type;
case OpCode::Id::LDG:
return instr.ldg.type;
default:
UNREACHABLE();
return {};
}
}();
const u32 count = GetUniformTypeElementsCount(instr.ldg.type); const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, instr, false);
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) { for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4); const Node it_offset = Immediate(i * 4);
const Node real_address = const Node real_address =
@ -165,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
break; break;
} }
case OpCode::Id::STG: {
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
static_cast<u32>(instr.stg.immediate_offset.Value()), true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
SetTemporal(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(instr.stg.type);
for (u32 i = 0; i < count; ++i) {
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
break;
}
case OpCode::Id::ST_A: { case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported"); "Indirect attribute loads are not supported");
@ -242,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
break; break;
} }
case OpCode::Id::ST:
case OpCode::Id::STG: {
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
switch (opcode->get().GetId()) {
case OpCode::Id::ST:
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
return instr.generic.type;
case OpCode::Id::STG:
return instr.stg.type;
default:
UNREACHABLE();
return {};
}
}();
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, instr, true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
SetTemporal(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
break;
}
case OpCode::Id::AL2P: { case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it. // Ignore al2p.direction since we don't care about it.
@ -265,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
Node addr_register, Instruction instr,
u32 immediate_offset,
bool is_write) { bool is_write) {
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
const Node base_address{ const Node base_address{
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf = std::get_if<CbufNode>(base_address); const auto cbuf = std::get_if<CbufNode>(base_address);

View file

@ -818,10 +818,8 @@ private:
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) const; s64 cursor) const;
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
Node addr_register, NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
u32 immediate_offset,
bool is_write);
template <typename... T> template <typename... T>
Node Operation(OperationCode code, const T*... operands) { Node Operation(OperationCode code, const T*... operands) {