forked from suyu/suyu
shader/memory: Implement ATOMS.ADD.U32
This commit is contained in:
parent
30faf6a964
commit
63ba41a26d
5 changed files with 74 additions and 3 deletions
|
@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
|
|||
Trunc = 11,
|
||||
};
|
||||
|
||||
enum class AtomicOp : u64 {
|
||||
Add = 0,
|
||||
Min = 1,
|
||||
Max = 2,
|
||||
Inc = 3,
|
||||
Dec = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Xor = 7,
|
||||
Exch = 8,
|
||||
};
|
||||
|
||||
enum class UniformType : u64 {
|
||||
UnsignedByte = 0,
|
||||
SignedByte = 1,
|
||||
|
@ -236,6 +248,13 @@ enum class StoreType : u64 {
|
|||
Bits128 = 6,
|
||||
};
|
||||
|
||||
enum class AtomicType : u64 {
|
||||
U32 = 0,
|
||||
S32 = 1,
|
||||
U64 = 2,
|
||||
S64 = 3,
|
||||
};
|
||||
|
||||
enum class IMinMaxExchange : u64 {
|
||||
None = 0,
|
||||
XLo = 1,
|
||||
|
@ -938,6 +957,16 @@ union Instruction {
|
|||
BitField<46, 2, u64> cache_mode;
|
||||
} stg;
|
||||
|
||||
union {
|
||||
BitField<52, 4, AtomicOp> operation;
|
||||
BitField<28, 2, AtomicType> type;
|
||||
BitField<30, 22, s64> offset;
|
||||
|
||||
s32 GetImmediateOffset() const {
|
||||
return static_cast<s32>(offset << 2);
|
||||
}
|
||||
} atoms;
|
||||
|
||||
union {
|
||||
BitField<32, 1, PhysicalAttributeDirection> direction;
|
||||
BitField<47, 3, AttributeSize> size;
|
||||
|
@ -1659,9 +1688,10 @@ public:
|
|||
ST_A,
|
||||
ST_L,
|
||||
ST_S,
|
||||
ST, // Store in generic memory
|
||||
STG, // Store in global memory
|
||||
AL2P, // Transforms attribute memory into physical memory
|
||||
ST, // Store in generic memory
|
||||
STG, // Store in global memory
|
||||
ATOMS, // Atomic operation on shared memory
|
||||
AL2P, // Transforms attribute memory into physical memory
|
||||
TEX,
|
||||
TEX_B, // Texture Load Bindless
|
||||
TXQ, // Texture Query
|
||||
|
@ -1964,6 +1994,7 @@ private:
|
|||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
|
||||
|
|
|
@ -1856,6 +1856,16 @@ private:
|
|||
Type::Uint};
|
||||
}
|
||||
|
||||
template <const std::string_view& opname, Type type>
|
||||
Expression Atomic(Operation operation) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
auto& smem = std::get<SmemNode>(*operation[0]);
|
||||
|
||||
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
|
||||
Visit(operation[1]).As(type)),
|
||||
type};
|
||||
}
|
||||
|
||||
Expression Branch(Operation operation) {
|
||||
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
|
||||
UNIMPLEMENTED_IF(!target);
|
||||
|
@ -2194,6 +2204,8 @@ private:
|
|||
&GLSLDecompiler::AtomicImage<Func::Xor>,
|
||||
&GLSLDecompiler::AtomicImage<Func::Exchange>,
|
||||
|
||||
&GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
|
||||
|
||||
&GLSLDecompiler::Branch,
|
||||
&GLSLDecompiler::BranchIndirect,
|
||||
&GLSLDecompiler::PushFlowStack,
|
||||
|
|
|
@ -1796,6 +1796,11 @@ private:
|
|||
return {};
|
||||
}
|
||||
|
||||
Expression UAtomicAdd(Operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression Branch(Operation operation) {
|
||||
const auto& target = std::get<ImmediateNode>(*operation[0]);
|
||||
OpStore(jmp_to, Constant(t_uint, target.GetValue()));
|
||||
|
@ -2373,6 +2378,8 @@ private:
|
|||
&SPIRVDecompiler::AtomicImageXor,
|
||||
&SPIRVDecompiler::AtomicImageExchange,
|
||||
|
||||
&SPIRVDecompiler::UAtomicAdd,
|
||||
|
||||
&SPIRVDecompiler::Branch,
|
||||
&SPIRVDecompiler::BranchIndirect,
|
||||
&SPIRVDecompiler::PushFlowStack,
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::AtomicOp;
|
||||
using Tegra::Shader::AtomicType;
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOMS: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
|
||||
static_cast<int>(instr.atoms.operation.Value()));
|
||||
UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
|
||||
static_cast<int>(instr.atoms.type.Value()));
|
||||
|
||||
const s32 offset = instr.atoms.GetImmediateOffset();
|
||||
Node address = GetRegister(instr.gpr8);
|
||||
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
|
||||
|
||||
Node memory = GetSharedMemory(std::move(address));
|
||||
Node data = GetRegister(instr.gpr20);
|
||||
|
||||
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::AL2P: {
|
||||
// Ignore al2p.direction since we don't care about it.
|
||||
|
||||
|
|
|
@ -162,6 +162,8 @@ enum class OperationCode {
|
|||
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
||||
|
||||
UAtomicAdd, /// (smem, uint) -> uint
|
||||
|
||||
Branch, /// (uint branch_target) -> void
|
||||
BranchIndirect, /// (uint branch_target) -> void
|
||||
PushFlowStack, /// (uint branch_target) -> void
|
||||
|
|
Loading…
Reference in a new issue