1
0
Fork 0
forked from suyu/suyu

Merge pull request #2520 from ReinUsesLisp/vulkan-refresh

vk_device,vk_shader_decompiler: Miscellaneous changes
This commit is contained in:
bunnei 2019-06-05 18:10:00 -04:00 committed by GitHub
commit a20ba09bfd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 219 additions and 89 deletions

@ -1 +1 @@
Subproject commit 15e5c4db7500b936ae758236f2e72fc1aec22020 Subproject commit d05c8df88da98ec1ab3bc600d7f5783b4060895b

View file

@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
} // namespace Alternatives } // namespace Alternatives
@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
SetupFamilies(dldi, surface); SetupFamilies(dldi, surface);
SetupProperties(dldi); SetupProperties(dldi);
SetupFeatures(dldi);
} }
VKDevice::~VKDevice() = default; VKDevice::~VKDevice() = default;
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
const auto queue_cis = GetDeviceQueueCreateInfos(); vk::PhysicalDeviceFeatures device_features;
vk::PhysicalDeviceFeatures device_features{}; device_features.vertexPipelineStoresAndAtomics = true;
device_features.independentBlend = true;
device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector<const char*> extensions = LoadExtensions(dldi);
const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
0, nullptr, static_cast<u32>(extensions.size()), 0, nullptr, static_cast<u32>(extensions.size()),
extensions.data(), &device_features); extensions.data(), &device_features);
@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
LOG_CRITICAL(Render_Vulkan, LOG_CRITICAL(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host " "Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it", "hardware does not support it",
static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), vk::to_string(wanted_format), vk::to_string(wanted_usage),
static_cast<u32>(format_type)); static_cast<u32>(format_type));
UNREACHABLE(); UNREACHABLE();
return wanted_format; return wanted_format;
@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
return wanted_format; return wanted_format;
} }
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const {
if (!features.textureCompressionASTC_LDR) {
return false;
}
const auto format_feature_usage{
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
constexpr std::array<vk::Format, 9> astc_formats = {
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock};
for (const auto format : astc_formats) {
const auto format_properties{physical.getFormatProperties(format, dldi)};
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
return false;
}
}
return true;
}
bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const { FormatType format_type) const {
const auto it = format_properties.find(wanted_format); const auto it = format_properties.find(wanted_format);
@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) { vk::SurfaceKHR surface) {
const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
bool has_swapchain{}; bool has_swapchain{};
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
has_swapchain |= prop.extensionName == swapchain_extension; has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
} }
if (!has_swapchain) { if (!has_swapchain) {
// The device doesn't support creating swapchains. // The device doesn't support creating swapchains.
@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
} }
// TODO(Rodrigo): Check if the device matches all requeriments. // TODO(Rodrigo): Check if the device matches all requeriments.
const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); const auto properties{physical.getProperties(dldi)};
if (props.limits.maxUniformBufferRange < 65536) { const auto limits{properties.limits};
if (limits.maxUniformBufferRange < 65536) {
return false;
}
const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
return false; return false;
} }
@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
return true; return true;
} }
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> extensions;
extensions.reserve(2);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
u32 revision) {
if (extension.extensionName != std::string(name)) {
return;
}
extensions.push_back(name);
if (status) {
status->get() = true;
}
};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
}
return extensions;
}
void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
std::optional<u32> graphics_family_, present_family_; std::optional<u32> graphics_family_, present_family_;
@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
device_type = props.deviceType; device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
}
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
} }
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static const float QUEUE_PRIORITY = 1.f; static const float QUEUE_PRIORITY = 1.0f;
std::set<u32> unique_queue_families = {graphics_family, present_family}; std::set<u32> unique_queue_families = {graphics_family, present_family};
std::vector<vk::DeviceQueueCreateInfo> queue_cis; std::vector<vk::DeviceQueueCreateInfo> queue_cis;
@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eB5G6R5UnormPack16,
vk::Format::eA2B10G10R10UnormPack32,
vk::Format::eR32G32B32A32Sfloat,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
vk::Format::eR8G8B8A8Srgb,
vk::Format::eR8Unorm,
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eD32Sfloat,
vk::Format::eD16Unorm,
vk::Format::eD16UnormS8Uint,
vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint,
vk::Format::eBc1RgbaUnormBlock,
vk::Format::eBc2UnormBlock,
vk::Format::eBc3UnormBlock,
vk::Format::eBc4UnormBlock,
vk::Format::eBc5UnormBlock,
vk::Format::eBc5SnormBlock,
vk::Format::eBc7UnormBlock,
vk::Format::eAstc4x4UnormBlock,
vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock};
std::map<vk::Format, vk::FormatProperties> format_properties; std::map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi)); format_properties.emplace(format, physical.getFormatProperties(format, dldi));
}; }
AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
AddFormatQuery(vk::Format::eR8Unorm);
AddFormatQuery(vk::Format::eD32Sfloat);
AddFormatQuery(vk::Format::eD16Unorm);
AddFormatQuery(vk::Format::eD16UnormS8Uint);
AddFormatQuery(vk::Format::eD24UnormS8Uint);
AddFormatQuery(vk::Format::eD32SfloatS8Uint);
AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
AddFormatQuery(vk::Format::eBc2UnormBlock);
AddFormatQuery(vk::Format::eBc3UnormBlock);
AddFormatQuery(vk::Format::eBc4UnormBlock);
return format_properties; return format_properties;
} }

View file

@ -11,7 +11,7 @@
namespace Vulkan { namespace Vulkan {
/// Format usage descriptor /// Format usage descriptor.
enum class FormatType { Linear, Optimal, Buffer }; enum class FormatType { Linear, Optimal, Buffer };
/// Handles data specific to a physical device. /// Handles data specific to a physical device.
@ -34,12 +34,12 @@ public:
vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const; FormatType format_type) const;
/// Returns the dispatch loader with direct function pointers of the device /// Returns the dispatch loader with direct function pointers of the device.
const vk::DispatchLoaderDynamic& GetDispatchLoader() const { const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
return dld; return dld;
} }
/// Returns the logical device /// Returns the logical device.
vk::Device GetLogical() const { vk::Device GetLogical() const {
return logical.get(); return logical.get();
} }
@ -69,30 +69,55 @@ public:
return present_family; return present_family;
} }
/// Returns if the device is integrated with the host CPU /// Returns if the device is integrated with the host CPU.
bool IsIntegrated() const { bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu; return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
} }
/// Returns uniform buffer alignment requeriment /// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const { u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment; return uniform_buffer_alignment;
} }
/// Returns the maximum range for storage buffers.
u64 GetMaxStorageBufferRange() const {
return max_storage_buffer_range;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsExtScalarBlockLayoutSupported() const {
return ext_scalar_block_layout;
}
/// Checks if the physical device is suitable. /// Checks if the physical device is suitable.
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface); vk::SurfaceKHR surface);
private: private:
/// Loads extensions into a vector and stores available ones in this object.
std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
/// Sets up queue families. /// Sets up queue families.
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
/// Sets up device properties. /// Sets up device properties.
void SetupProperties(const vk::DispatchLoaderDynamic& dldi); void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
/// Sets up device features.
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
/// Returns a list of queue initialization descriptors. /// Returns a list of queue initialization descriptors.
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
/// Returns true if ASTC textures are natively supported.
bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const;
/// Returns true if a format is supported. /// Returns true if a format is supported.
bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const; FormatType format_type) const;
@ -101,16 +126,19 @@ private:
static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
const vk::PhysicalDevice physical; ///< Physical device const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers vk::DispatchLoaderDynamic dld; ///< Device function pointers.
UniqueDevice logical; ///< Logical device UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index u32 present_family{}; ///< Main present queue family index.
vk::PhysicalDeviceType device_type; ///< Physical device type vk::PhysicalDeviceType device_type; ///< Physical device type.
u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary u64 max_storage_buffer_range{}; ///< Max storage buffer size.
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout.
std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -17,6 +17,7 @@
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h" #include "video_core/engines/shader_header.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/shader_ir.h" #include "video_core/shader/shader_ir.h"
@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&; using Operation = const OperationNode&;
// TODO(Rodrigo): Use rasterizer's value // TODO(Rodrigo): Use rasterizer's value
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
constexpr u32 STAGE_BINDING_STRIDE = 0x100; constexpr u32 STAGE_BINDING_STRIDE = 0x100;
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler : public Sirit::Module { class SPIRVDecompiler : public Sirit::Module {
public: public:
explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
: Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::Shader);
AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_storage_buffer_storage_class");
AddExtension("SPV_KHR_variable_pointers"); AddExtension("SPV_KHR_variable_pointers");
@ -195,7 +197,9 @@ public:
entries.samplers.emplace_back(sampler); entries.samplers.emplace_back(sampler);
} }
for (const auto& attribute : ir.GetInputAttributes()) { for (const auto& attribute : ir.GetInputAttributes()) {
entries.attributes.insert(GetGenericAttributeLocation(attribute)); if (IsGenericAttribute(attribute)) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
} }
entries.clip_distances = ir.GetClipDistances(); entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength(); entries.shader_length = ir.GetLength();
@ -210,7 +214,6 @@ private:
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
static constexpr u32 CBUF_STRIDE = 16;
void AllocateBindings() { void AllocateBindings() {
const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@ -315,6 +318,7 @@ private:
constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
"overflow"}; "overflow"};
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
const auto flag_code = static_cast<InternalFlag>(flag);
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
} }
@ -374,7 +378,9 @@ private:
u32 binding = const_buffers_base_binding; u32 binding = const_buffers_base_binding;
for (const auto& entry : ir.GetConstantBuffers()) { for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry; const auto [index, size] = entry;
const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); const Id type =
device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
const Id id = OpVariable(type, spv::StorageClass::Uniform);
AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::Binding, binding++);
@ -569,33 +575,35 @@ private:
const Node offset = cbuf->GetOffset(); const Node offset = cbuf->GetOffset();
const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
Id buffer_index{}; Id pointer{};
Id buffer_element{}; if (device.IsExtScalarBlockLayoutSupported()) {
const Id buffer_offset = Emit(OpShiftRightLogical(
if (const auto immediate = std::get_if<ImmediateNode>(offset)) { t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
// Direct access pointer = Emit(
const u32 offset_imm = immediate->GetValue(); OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
ASSERT(offset_imm % 4 == 0);
buffer_index = Constant(t_uint, offset_imm / 16);
buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
// TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
// emits sub-optimal code on GLSL from my testing).
const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
const Id final_offset = Emit(
OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
} else { } else {
UNREACHABLE_MSG("Unmanaged offset node type"); Id buffer_index{};
Id buffer_element{};
if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT(offset_imm % 4 == 0);
buffer_index = Constant(t_uint, offset_imm / 16);
buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
const Id final_offset = Emit(OpUMod(
t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
} else {
UNREACHABLE_MSG("Unmanaged offset node type");
}
pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
buffer_index, buffer_element));
} }
const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
buffer_index, buffer_element));
return Emit(OpLoad(t_float, pointer)); return Emit(OpLoad(t_float, pointer));
} else if (const auto gmem = std::get_if<GmemNode>(node)) { } else if (const auto gmem = std::get_if<GmemNode>(node)) {
@ -612,7 +620,9 @@ private:
// It's invalid to call conditional on nested nodes, use an operation instead // It's invalid to call conditional on nested nodes, use an operation instead
const Id true_label = OpLabel(); const Id true_label = OpLabel();
const Id skip_label = OpLabel(); const Id skip_label = OpLabel();
Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); const Id condition = Visit(conditional->GetCondition());
Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
Emit(OpBranchConditional(condition, true_label, skip_label));
Emit(true_label); Emit(true_label);
VisitBasicBlock(conditional->GetCode()); VisitBasicBlock(conditional->GetCode());
@ -968,11 +978,11 @@ private:
case ShaderStage::Vertex: { case ShaderStage::Vertex: {
// TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
// seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
const Id position = AccessElement(t_float4, per_vertex, position_index); const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); Id depth = Emit(OpLoad(t_float, z_pointer));
depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); Emit(OpStore(z_pointer, depth));
break; break;
} }
case ShaderStage::Fragment: { case ShaderStage::Fragment: {
@ -1311,6 +1321,7 @@ private:
&SPIRVDecompiler::WorkGroupId<2>, &SPIRVDecompiler::WorkGroupId<2>,
}; };
const VKDevice& device;
const ShaderIR& ir; const ShaderIR& ir;
const ShaderStage stage; const ShaderStage stage;
const Tegra::Shader::Header header; const Tegra::Shader::Header header;
@ -1349,12 +1360,18 @@ private:
const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
const Id t_cbuf_array = const Id t_cbuf_std140 = Decorate(
Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
spv::Decoration::ArrayStride, CBUF_STRIDE); spv::Decoration::ArrayStride, 16u);
const Id t_cbuf_struct = MemberDecorate( const Id t_cbuf_scalar = Decorate(
Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); spv::Decoration::ArrayStride, 4u);
const Id t_cbuf_std140_struct = MemberDecorate(
Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_cbuf_scalar_struct = MemberDecorate(
Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
const Id t_gmem_array = const Id t_gmem_array =
@ -1403,8 +1420,9 @@ private:
std::map<u32, Id> labels; std::map<u32, Id> labels;
}; };
DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); Maxwell::ShaderStage stage) {
auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
decompiler->Decompile(); decompiler->Decompile();
return {std::move(decompiler), decompiler->GetShaderEntries()}; return {std::move(decompiler), decompiler->GetShaderEntries()};
} }

View file

@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
class ShaderIR; class ShaderIR;
} }
namespace Vulkan {
class VKDevice;
}
namespace Vulkan::VKShader { namespace Vulkan::VKShader {
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::Sampler; using SamplerEntry = VideoCommon::Shader::Sampler;
constexpr u32 DESCRIPTOR_SET = 0; constexpr u32 DESCRIPTOR_SET = 0;
@ -75,6 +78,7 @@ struct ShaderEntries {
using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Maxwell::ShaderStage stage);
} // namespace Vulkan::VKShader } // namespace Vulkan::VKShader