From 36abf67e79b234a361b99a342391249095ccd79c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Jul 2019 21:03:53 -0300 Subject: [PATCH] shader/image: Implement SUATOM and fix SUST --- src/video_core/engines/shader_bytecode.h | 32 ++++ .../renderer_opengl/gl_shader_decompiler.cpp | 151 ++++++++++++++---- .../renderer_opengl/gl_shader_disk_cache.cpp | 20 ++- .../renderer_vulkan/vk_shader_decompiler.cpp | 42 +++++ src/video_core/shader/decode/image.cpp | 92 ++++++++--- src/video_core/shader/node.h | 57 +++++-- src/video_core/shader/shader_ir.h | 10 +- 7 files changed, 332 insertions(+), 72 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index bd8c1ada03..052e6d24ee 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -544,6 +544,28 @@ enum class VoteOperation : u64 { Eq = 2, // allThreadsEqualNV }; +enum class ImageAtomicSize : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32 = 3, + S64 = 5, + SD32 = 6, + SD64 = 7, +}; + +enum class ImageAtomicOperation : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -1391,6 +1413,14 @@ union Instruction { } } sust; + union { + BitField<28, 1, u64> is_ba; + BitField<51, 3, ImageAtomicSize> size; + BitField<33, 3, ImageType> image_type; + BitField<29, 4, ImageAtomicOperation> operation; + BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; + } suatom_d; + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -1543,6 +1573,7 @@ public: TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level SUST, // Surface Store + SUATOM, // Surface Atomic Operation EXIT, NOP, IPA, @@ -1826,6 +1857,7 @@ private: INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), INST("11101011001-----", Id::SUST, Type::Image, "SUST"), + INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6edb2ca383..137b23740c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -706,7 +706,7 @@ private: void DeclareImages() { const auto& images{ir.GetImages()}; for (const auto& [offset, image] : images) { - const std::string image_type = [&]() { + const char* image_type = [&] { switch (image.GetType()) { case Tegra::Shader::ImageType::Texture1D: return "image1D"; @@ -725,6 +725,23 @@ private: return "image1D"; } }(); + + const auto [type_prefix, format] = [&]() -> std::pair { + if (!image.IsSizeKnown()) { + return {"", ""}; + } + switch (image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {"u", "r32ui, "}; + case Tegra::Shader::ImageAtomicSize::S32: + return {"i", "r32i, "}; + default: + UNIMPLEMENTED_MSG("Unimplemented atomic size={}", + static_cast(image.GetSize())); + return {"", ""}; + } + }(); + std::string qualifier = "coherent volatile"; if (image.IsRead() && !image.IsWritten()) { qualifier += " readonly"; @@ -1180,6 +1197,74 @@ private: return expr; } + std::string BuildIntegerCoordinates(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const std::size_t coords_count{operation.GetOperandsCount()}; + std::string expr = constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i).AsInt(); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + std::string BuildImageValues(Operation operation) { + const auto meta{std::get(operation.GetMeta())}; + const auto [constructors, type] = [&]() -> std::pair, Type> { + constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; + if (!meta.image.IsSizeKnown()) { + return {float_constructors, Type::Float}; + } + switch (meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; + case Tegra::Shader::ImageAtomicSize::S32: + return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", + static_cast(meta.image.GetSize())); + return {float_constructors, Type::Float}; + } + }(); + + const std::size_t values_count{meta.values.size()}; + std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)).As(type); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + Expression AtomicImage(Operation operation, const char* opname) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get(operation.GetMeta())}; + ASSERT(meta.values.size() == 1); + ASSERT(meta.image.IsSizeKnown()); + + const auto type = [&]() { + switch (const auto size = meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return Type::Uint; + case Tegra::Shader::ImageAtomicSize::S32: + return Type::Int; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast(size)); + return Type::Uint; + } + }(); + + return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), + BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), + type}; + } + Expression Assign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -1694,38 +1779,39 @@ private: } Expression ImageStore(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; const auto meta{std::get(operation.GetMeta())}; - - std::string expr = "imageStore("; - expr += GetImage(meta.image); - expr += ", "; - - const std::size_t coords_count{operation.GetOperandsCount()}; - expr += constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += "), "; - - const std::size_t values_count{meta.values.size()}; - UNIMPLEMENTED_IF(values_count != 4); - expr += "vec4("; - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsFloat(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += "));"; - - code.AddLine(expr); + code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), + BuildIntegerCoordinates(operation), BuildImageValues(operation)); return {}; } + Expression AtomicImageAdd(Operation operation) { + return AtomicImage(operation, "imageAtomicAdd"); + } + + Expression AtomicImageMin(Operation operation) { + return AtomicImage(operation, "imageAtomicMin"); + } + + Expression AtomicImageMax(Operation operation) { + return AtomicImage(operation, "imageAtomicMax"); + } + Expression AtomicImageAnd(Operation operation) { + return AtomicImage(operation, "imageAtomicAnd"); + } + + Expression AtomicImageOr(Operation operation) { + return AtomicImage(operation, "imageAtomicOr"); + } + + Expression AtomicImageXor(Operation operation) { + return AtomicImage(operation, "imageAtomicXor"); + } + + Expression AtomicImageExchange(Operation operation) { + return AtomicImage(operation, "imageAtomicExchange"); + } + Expression Branch(Operation operation) { const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -2019,6 +2105,13 @@ private: &GLSLDecompiler::TexelFetch, &GLSLDecompiler::ImageStore, + &GLSLDecompiler::AtomicImageAdd, + &GLSLDecompiler::AtomicImageMin, + &GLSLDecompiler::AtomicImageMax, + &GLSLDecompiler::AtomicImageAnd, + &GLSLDecompiler::AtomicImageOr, + &GLSLDecompiler::AtomicImageXor, + &GLSLDecompiler::AtomicImageExchange, &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 5450feedf4..f141c4e3b1 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -341,16 +341,22 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn u64 index{}; u32 type{}; u8 is_bindless{}; - u8 is_read{}; u8 is_written{}; + u8 is_read{}; + u8 is_size_known{}; + u32 size{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || - !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { + !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || + !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { return {}; } - entry.entries.images.emplace_back(static_cast(offset), static_cast(index), - static_cast(type), - is_bindless != 0, is_written != 0, is_read != 0); + entry.entries.images.emplace_back( + static_cast(offset), static_cast(index), + static_cast(type), is_bindless != 0, is_written != 0, + is_read != 0, + is_size_known ? std::make_optional(static_cast(size)) + : std::nullopt); } u32 global_memory_count{}; @@ -429,12 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: return false; } for (const auto& image : entries.images) { + const u32 size = image.IsSizeKnown() ? static_cast(image.GetSize()) : 0U; if (!SaveObjectToPrecompiled(static_cast(image.GetOffset())) || !SaveObjectToPrecompiled(static_cast(image.GetIndex())) || !SaveObjectToPrecompiled(static_cast(image.GetType())) || !SaveObjectToPrecompiled(static_cast(image.IsBindless() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast(image.IsWritten() ? 1 : 0)) || !SaveObjectToPrecompiled(static_cast(image.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast(image.IsWritten() ? 1 : 0))) { + !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { return false; } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a35b45c9c8..7675fc7b39 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -944,6 +944,41 @@ private: return {}; } + Id AtomicImageAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageMin(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageMax(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageAnd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageOr(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageXor(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageExchange(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id Branch(Operation operation) { const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1366,6 +1401,13 @@ private: &SPIRVDecompiler::TexelFetch, &SPIRVDecompiler::ImageStore, + &SPIRVDecompiler::AtomicImageAdd, + &SPIRVDecompiler::AtomicImageMin, + &SPIRVDecompiler::AtomicImageMax, + &SPIRVDecompiler::AtomicImageAnd, + &SPIRVDecompiler::AtomicImageOr, + &SPIRVDecompiler::AtomicImageXor, + &SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 008109a990..d54fb88c99 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SUST: { UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store @@ -66,8 +65,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { image.MarkWrite(); MetaImage meta{image, values}; - const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; - bb.push_back(store); + bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); + break; + } + case OpCode::Id::SUATOM: { + UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); + + Node value = GetRegister(instr.gpr0); + + std::vector coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + const OperationCode operation_code = [instr] { + switch (instr.suatom_d.operation) { + case Tegra::Shader::ImageAtomicOperation::Add: + return OperationCode::AtomicImageAdd; + case Tegra::Shader::ImageAtomicOperation::Min: + return OperationCode::AtomicImageMin; + case Tegra::Shader::ImageAtomicOperation::Max: + return OperationCode::AtomicImageMax; + case Tegra::Shader::ImageAtomicOperation::And: + return OperationCode::AtomicImageAnd; + case Tegra::Shader::ImageAtomicOperation::Or: + return OperationCode::AtomicImageOr; + case Tegra::Shader::ImageAtomicOperation::Xor: + return OperationCode::AtomicImageXor; + case Tegra::Shader::ImageAtomicOperation::Exch: + return OperationCode::AtomicImageExchange; + default: + UNIMPLEMENTED_MSG("Unimplemented operation={}", + static_cast(instr.suatom_d.operation.Value())); + return OperationCode::AtomicImageAdd; + } + }(); + + const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; + MetaImage meta{image, {std::move(value)}}; + SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); break; } default: @@ -77,38 +114,51 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return pc; } -Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset{static_cast(image.index.Value())}; - - // If this image has already been used, return the existing mapping. - const auto it = used_images.find(offset); - if (it != used_images.end()) { - ASSERT(it->second.GetType() == type); - return it->second; +Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional size) { + const auto offset{static_cast(image.index.Value())}; + if (const auto image = TryUseExistingImage(offset, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - return used_images.emplace(offset, Image{offset, next_index, type}).first->second; + return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; } -Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { +Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional size) { const Node image_register{GetRegister(reg)}; const auto [base_image, cbuf_index, cbuf_offset]{ TrackCbuf(image_register, global_code, static_cast(global_code.size()))}; const auto cbuf_key{(static_cast(cbuf_index) << 32) | static_cast(cbuf_offset)}; - // If this image has already been used, return the existing mapping. - const auto it = used_images.find(cbuf_key); - if (it != used_images.end()) { - ASSERT(it->second.GetType() == type); - return it->second; + if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) + return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) .first->second; } +Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional size) { + auto it = used_images.find(offset); + if (it == used_images.end()) { + return nullptr; + } + auto& image = it->second; + ASSERT(image.GetType() == type); + + if (size) { + // We know the size, if it's known it has to be the same as before, otherwise we can set it. + if (image.IsSizeKnown()) { + ASSERT(image.GetSize() == size); + } else { + image.SetSize(*size); + } + } + return ℑ +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index b29aedce8d..b47b201cf0 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,14 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 - ImageStore, /// (MetaImage, float[N] coords) -> void + ImageStore, /// (MetaImage, int[N] values) -> void + AtomicImageAdd, /// (MetaImage, int[N] coords) -> void + AtomicImageMin, /// (MetaImage, int[N] coords) -> void + AtomicImageMax, /// (MetaImage, int[N] coords) -> void + AtomicImageAnd, /// (MetaImage, int[N] coords) -> void + AtomicImageOr, /// (MetaImage, int[N] coords) -> void + AtomicImageXor, /// (MetaImage, int[N] coords) -> void + AtomicImageExchange, /// (MetaImage, int[N] coords) -> void Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -275,25 +283,32 @@ private: class Image final { public: - constexpr explicit Image(u64 offset, std::size_t index, Tegra::Shader::ImageType type) - : offset{offset}, index{index}, type{type}, is_bindless{false} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + std::optional size) + : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) + Tegra::Shader::ImageType type, + std::optional size) : offset{(static_cast(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_bindless{true} {} + is_bindless{true}, size{size} {} constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless, bool is_written, bool is_read) + bool is_bindless, bool is_written, bool is_read, + std::optional size) : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, - is_written{is_written}, is_read{is_read} {} + is_written{is_written}, is_read{is_read}, size{size} {} + + void MarkWrite() { + is_written = true; + } void MarkRead() { is_read = true; } - void MarkWrite() { - is_written = true; + void SetSize(Tegra::Shader::ImageAtomicSize size_) { + size = size_; } constexpr std::size_t GetOffset() const { @@ -312,25 +327,39 @@ public: return is_bindless; } - constexpr bool IsRead() const { - return is_read; - } - constexpr bool IsWritten() const { return is_written; } + constexpr bool IsRead() const { + return is_read; + } + constexpr std::pair GetBindlessCBuf() const { return {static_cast(offset >> 32), static_cast(offset)}; } + constexpr bool IsSizeKnown() const { + return size.has_value(); + } + + constexpr Tegra::Shader::ImageAtomicSize GetSize() const { + return size.value(); + } + + constexpr bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, size, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); + } + private: u64 offset{}; std::size_t index{}; Tegra::Shader::ImageType type{}; bool is_bindless{}; - bool is_read{}; bool is_written{}; + bool is_read{}; + std::optional size{}; }; struct GlobalMemoryBase { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0f891eace6..62816bd56e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -272,10 +272,16 @@ private: bool is_shadow); /// Accesses an image. - Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional size = {}); /// Access a bindless image sampler. - Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional size = {}); + + /// Tries to access an existing image, updating it's state as needed + Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional size); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits);