shader/memory: Implement RED.E.ADD
Implements a reduction operation. It's an atomic operation that doesn't return a value. This commit introduces another primitive because some shading languages might have a primitive for reduction operations.
This commit is contained in:
parent
fd0a2b5151
commit
3185245845
5 changed files with 98 additions and 27 deletions
|
@ -988,6 +988,12 @@ union Instruction {
|
||||||
BitField<46, 2, u64> cache_mode;
|
BitField<46, 2, u64> cache_mode;
|
||||||
} stg;
|
} stg;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<23, 3, AtomicOp> operation;
|
||||||
|
BitField<48, 1, u64> extended;
|
||||||
|
BitField<20, 3, GlobalAtomicType> type;
|
||||||
|
} red;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<52, 4, AtomicOp> operation;
|
BitField<52, 4, AtomicOp> operation;
|
||||||
BitField<49, 3, GlobalAtomicType> type;
|
BitField<49, 3, GlobalAtomicType> type;
|
||||||
|
@ -1733,6 +1739,7 @@ public:
|
||||||
ST_S,
|
ST_S,
|
||||||
ST, // Store in generic memory
|
ST, // Store in generic memory
|
||||||
STG, // Store in global memory
|
STG, // Store in global memory
|
||||||
|
RED, // Reduction operation
|
||||||
ATOM, // Atomic operation on global memory
|
ATOM, // Atomic operation on global memory
|
||||||
ATOMS, // Atomic operation on shared memory
|
ATOMS, // Atomic operation on shared memory
|
||||||
AL2P, // Transforms attribute memory into physical memory
|
AL2P, // Transforms attribute memory into physical memory
|
||||||
|
@ -2039,6 +2046,7 @@ private:
|
||||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||||
|
INST("1110101111111---", Id::RED, Type::Memory, "RED"),
|
||||||
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
|
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
|
||||||
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||||
|
|
|
@ -2119,8 +2119,14 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
|
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
|
||||||
Visit(operation[1]).As(type)),
|
Visit(operation[1]).AsUint()),
|
||||||
type};
|
Type::Uint};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <const std::string_view& opname, Type type>
|
||||||
|
Expression Reduce(Operation operation) {
|
||||||
|
code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Expression Branch(Operation operation) {
|
Expression Branch(Operation operation) {
|
||||||
|
@ -2479,6 +2485,20 @@ private:
|
||||||
&GLSLDecompiler::Atomic<Func::Or, Type::Int>,
|
&GLSLDecompiler::Atomic<Func::Or, Type::Int>,
|
||||||
&GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
|
&GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
|
||||||
|
|
||||||
|
&GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::And, Type::Uint>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
|
||||||
|
|
||||||
|
&GLSLDecompiler::Reduce<Func::Add, Type::Int>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Min, Type::Int>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Max, Type::Int>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::And, Type::Int>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Or, Type::Int>,
|
||||||
|
&GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
|
||||||
|
|
||||||
&GLSLDecompiler::Branch,
|
&GLSLDecompiler::Branch,
|
||||||
&GLSLDecompiler::BranchIndirect,
|
&GLSLDecompiler::BranchIndirect,
|
||||||
&GLSLDecompiler::PushFlowStack,
|
&GLSLDecompiler::PushFlowStack,
|
||||||
|
|
|
@ -1941,11 +1941,8 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type,
|
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||||
Type value_type = result_type>
|
|
||||||
Expression Atomic(Operation operation) {
|
Expression Atomic(Operation operation) {
|
||||||
const Id type_def = GetTypeDefinition(result_type);
|
|
||||||
|
|
||||||
Id pointer;
|
Id pointer;
|
||||||
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||||
pointer = GetSharedMemoryPointer(*smem);
|
pointer = GetSharedMemoryPointer(*smem);
|
||||||
|
@ -1953,15 +1950,19 @@ private:
|
||||||
pointer = GetGlobalMemoryPointer(*gmem);
|
pointer = GetGlobalMemoryPointer(*gmem);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return {Constant(type_def, 0), result_type};
|
return {v_float_zero, Type::Float};
|
||||||
}
|
}
|
||||||
|
|
||||||
const Id value = As(Visit(operation[1]), value_type);
|
|
||||||
|
|
||||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||||
const Id semantics = Constant(type_def, 0);
|
const Id semantics = Constant(t_uint, 0);
|
||||||
|
const Id value = AsUint(Visit(operation[1]));
|
||||||
|
|
||||||
return {(this->*func)(type_def, pointer, scope, semantics, value), result_type};
|
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||||
|
Expression Reduce(Operation operation) {
|
||||||
|
Atomic<func>(operation);
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Expression Branch(Operation operation) {
|
Expression Branch(Operation operation) {
|
||||||
|
@ -2550,21 +2551,35 @@ private:
|
||||||
&SPIRVDecompiler::AtomicImageXor,
|
&SPIRVDecompiler::AtomicImageXor,
|
||||||
&SPIRVDecompiler::AtomicImageExchange,
|
&SPIRVDecompiler::AtomicImageExchange,
|
||||||
|
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
|
||||||
|
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
|
||||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>,
|
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
|
||||||
|
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
|
||||||
|
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
|
||||||
|
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
|
||||||
|
|
||||||
&SPIRVDecompiler::Branch,
|
&SPIRVDecompiler::Branch,
|
||||||
&SPIRVDecompiler::BranchIndirect,
|
&SPIRVDecompiler::BranchIndirect,
|
||||||
|
|
|
@ -378,13 +378,27 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
if (IsUnaligned(type)) {
|
if (IsUnaligned(type)) {
|
||||||
const u32 mask = GetUnalignedMask(type);
|
const u32 mask = GetUnalignedMask(type);
|
||||||
value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
|
value = InsertUnaligned(gmem, move(value), real_address, mask, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::RED: {
|
||||||
|
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
|
||||||
|
UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
|
||||||
|
const auto [real_address, base_address, descriptor] =
|
||||||
|
TrackGlobalMemory(bb, instr, true, true);
|
||||||
|
if (!real_address || !base_address) {
|
||||||
|
// Tracking failed, skip atomic.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||||
|
Node value = GetRegister(instr.gpr0);
|
||||||
|
bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::ATOM: {
|
case OpCode::Id::ATOM: {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
|
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
|
||||||
instr.atom.operation == AtomicOp::Dec ||
|
instr.atom.operation == AtomicOp::Dec ||
|
||||||
|
|
|
@ -178,6 +178,20 @@ enum class OperationCode {
|
||||||
AtomicIOr, /// (memory, int) -> int
|
AtomicIOr, /// (memory, int) -> int
|
||||||
AtomicIXor, /// (memory, int) -> int
|
AtomicIXor, /// (memory, int) -> int
|
||||||
|
|
||||||
|
ReduceUAdd, /// (memory, uint) -> void
|
||||||
|
ReduceUMin, /// (memory, uint) -> void
|
||||||
|
ReduceUMax, /// (memory, uint) -> void
|
||||||
|
ReduceUAnd, /// (memory, uint) -> void
|
||||||
|
ReduceUOr, /// (memory, uint) -> void
|
||||||
|
ReduceUXor, /// (memory, uint) -> void
|
||||||
|
|
||||||
|
ReduceIAdd, /// (memory, int) -> void
|
||||||
|
ReduceIMin, /// (memory, int) -> void
|
||||||
|
ReduceIMax, /// (memory, int) -> void
|
||||||
|
ReduceIAnd, /// (memory, int) -> void
|
||||||
|
ReduceIOr, /// (memory, int) -> void
|
||||||
|
ReduceIXor, /// (memory, int) -> void
|
||||||
|
|
||||||
Branch, /// (uint branch_target) -> void
|
Branch, /// (uint branch_target) -> void
|
||||||
BranchIndirect, /// (uint branch_target) -> void
|
BranchIndirect, /// (uint branch_target) -> void
|
||||||
PushFlowStack, /// (uint branch_target) -> void
|
PushFlowStack, /// (uint branch_target) -> void
|
||||||
|
|
Loading…
Reference in a new issue