diff options
author | liamwhite <liamwhite@users.noreply.github.com> | 2023-10-22 00:21:53 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-22 00:21:53 +0200 |
commit | 4b06bcc82c63f5054072bce55c5381c9205e1088 (patch) | |
tree | 27832c1be672c53d6ec8892b948c80f0ced799d1 | |
parent | Merge pull request #11748 from liamwhite/kern_1700 (diff) | |
parent | Manually robust on Maxwell and earlier (diff) | |
download | yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.tar yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.tar.gz yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.tar.bz2 yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.tar.lz yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.tar.xz yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.tar.zst yuzu-4b06bcc82c63f5054072bce55c5381c9205e1088.zip |
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 23 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/spirv_emit_context.h | 36 | ||||
-rw-r--r-- | src/shader_recompiler/profile.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 6 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 35 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 18 |
6 files changed, 97 insertions, 25 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2868fc57d..1d77426e0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, } else if (element_size > 1) { const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; const Id shift{ctx.Const(log2_element_size)}; - buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift); } else { buffer_offset = ctx.Def(offset); } if (!binding.IsImmediate()) { return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; - return ctx.OpLoad(result_type, access_chain); + const Id val = ctx.OpLoad(result_type, access_chain); + + if (offset.IsImmediate() || !ctx.profile.has_broken_robust) { + return val; + } + + const auto is_float = UniformDefinitions::IsFloat(member_ptr); + const auto num_elements = UniformDefinitions::NumElements(member_ptr); + const std::array zero_vec{ + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + }; + const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); + const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); + return ctx.OpSelect(result_type, cond, val, zero); } Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { @@ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde const u32 element{(offset.U32() / 4) % 4 + index_offset}; return ctx.OpCompositeExtract(ctx.U32[1], vector, element); } - const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; + const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; if (index_offset > 0) { element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 7c49fd504..1aa79863d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -64,6 +64,42 @@ struct UniformDefinitions { Id F32{}; Id U32x2{}; Id U32x4{}; + + constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) { + if (member_ptr == &UniformDefinitions::U8) { + return 1; + } + if (member_ptr == &UniformDefinitions::S8) { + return 1; + } + if (member_ptr == &UniformDefinitions::U16) { + return 1; + } + if (member_ptr == &UniformDefinitions::S16) { + return 1; + } + if (member_ptr == &UniformDefinitions::U32) { + return 1; + } + if (member_ptr == &UniformDefinitions::F32) { + return 1; + } + if (member_ptr == &UniformDefinitions::U32x2) { + return 2; + } + if (member_ptr == &UniformDefinitions::U32x4) { + return 4; + } + ASSERT(false); + return 1; + } + + constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) { + if (member_ptr == &UniformDefinitions::F32) { + return true; + } + return false; + } }; struct StorageTypeDefinition { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9ca97f6a4..38d820db2 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -9,7 +9,6 @@ namespace Shader { struct Profile { u32 supported_spirv{0x00010000}; - bool unified_descriptor_binding{}; bool support_descriptor_aliasing{}; bool support_int8{}; @@ -82,6 +81,9 @@ struct Profile { bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; u32 gl_max_compute_smem_size{}; + + /// Maxwell and earlier nVidia architectures have broken robust support + bool has_broken_robust{}; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a1ec1a100..804b95989 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, .ignore_nan_fp_comparisons = false, .has_broken_spirv_subgroup_mask_vector_extract_dynamic = - driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, + .has_broken_robust = + device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell, + }; + host_info = Shader::HostTranslateInfo{ .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 876cec2e8..e518756d2 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ } // namespace Alternatives -enum class NvidiaArchitecture { - KeplerOrOlder, - Maxwell, - Pascal, - Volta, - Turing, - AmpereOrNewer, -}; - template <typename T> void SetNext(void**& next, T& data) { *next = &data; @@ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { // Only Ampere and newer support this feature // TODO: Find a way to differentiate Ampere and Ada - return NvidiaArchitecture::AmpereOrNewer; + return NvidiaArchitecture::Arch_AmpereOrNewer; } - return NvidiaArchitecture::Turing; + return NvidiaArchitecture::Arch_Turing; } if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { @@ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, physical_properties.pNext = &advanced_blending_props; physical.GetProperties2(physical_properties); if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { - return NvidiaArchitecture::Maxwell; + return NvidiaArchitecture::Arch_Maxwell; } if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { @@ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, physical_properties.pNext = &conservative_raster_props; physical.GetProperties2(physical_properties); if (conservative_raster_props.degenerateLinesRasterized) { - return NvidiaArchitecture::Volta; + return NvidiaArchitecture::Arch_Volta; } - return NvidiaArchitecture::Pascal; + return NvidiaArchitecture::Arch_Pascal; } } - return NvidiaArchitecture::KeplerOrOlder; + return NvidiaArchitecture::Arch_KeplerOrOlder; } std::vector<const char*> ExtensionListForVulkan( @@ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } + if (is_nvidia) { + nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions); + } + SetupFamilies(surface); const auto queue_cis = GetDeviceQueueCreateInfos(); @@ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_nvidia) { const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; - const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - if (arch >= NvidiaArchitecture::AmpereOrNewer) { + const auto arch = GetNvidiaArch(); + if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) { LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); features.shader_float16_int8.shaderFloat16 = false; - } else if (arch <= NvidiaArchitecture::Volta) { + } else if (arch <= NvidiaArchitecture::Arch_Volta) { if (nv_major_version < 527) { LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } } else if (extensions.push_descriptor && is_nvidia) { - const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - if (arch <= NvidiaArchitecture::Pascal) { + const auto arch = GetNvidiaArch(); + if (arch <= NvidiaArchitecture::Arch_Pascal) { LOG_WARNING(Render_Vulkan, "Pascal and older architectures have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 282a2925d..b213ed7dd 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer }; /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). const u32 GuestWarpSize = 32; +enum class NvidiaArchitecture { + Arch_KeplerOrOlder, + Arch_Maxwell, + Arch_Pascal, + Arch_Volta, + Arch_Turing, + Arch_AmpereOrNewer, +}; + /// Handles data specific to a physical device. class Device { public: @@ -670,6 +679,14 @@ public: return false; } + bool IsNvidia() const noexcept { + return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + } + + NvidiaArchitecture GetNvidiaArch() const noexcept { + return nvidia_arch; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -788,6 +805,7 @@ private: bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool + NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; // Telemetry parameters std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. |