diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/gpu.cpp | 10 | ||||
-rw-r--r-- | src/video_core/gpu.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 13 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 9 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 39 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/shader_environment.cpp | 54 | ||||
-rw-r--r-- | src/video_core/shader_environment.h | 6 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 10 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 9 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 5 |
13 files changed, 147 insertions, 33 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index f524f8bae..705765c99 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -311,6 +311,12 @@ struct GPU::Impl { cpu_context->MakeCurrent(); } + void NotifyShutdown() { + std::unique_lock lk{sync_mutex}; + shutting_down.store(true, std::memory_order::relaxed); + sync_cv.notify_all(); + } + /// Obtain the CPU Context void ObtainContext() { cpu_context->MakeCurrent(); @@ -858,6 +864,10 @@ void GPU::Start() { impl->Start(); } +void GPU::NotifyShutdown() { + impl->NotifyShutdown(); +} + void GPU::ObtainContext() { impl->ObtainContext(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 500411176..3188b83ed 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -232,6 +232,9 @@ public: /// core timing events. void Start(); + /// Performs any additional necessary steps to shutdown GPU emulation. + void NotifyShutdown(); + /// Obtain the CPU Context void ObtainContext(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 0764ea6e0..e62912a22 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,17 +182,13 @@ Device::Device() { shader_backend = Settings::ShaderBackend::GLSL; } - if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia && - !Settings::values.renderer_debug) { + if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) { const std::string_view driver_version = version.substr(13); const int version_major = std::atoi(driver_version.substr(0, driver_version.find(".")).data()); - if (version_major >= 495) { - LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems " - "with yuzu. Forcing GLASM as a mitigation."); - shader_backend = Settings::ShaderBackend::GLASM; - use_assembly_shaders = true; + has_cbuf_ftou_bug = true; + has_bool_ref_bug = true; } } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de9e41659..95c2e8d38 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -152,6 +152,14 @@ public: return need_fastmath_off; } + bool HasCbufFtouBug() const { + return has_cbuf_ftou_bug; + } + + bool HasBoolRefBug() const { + return has_bool_ref_bug; + } + Settings::ShaderBackend GetShaderBackend() const { return shader_backend; } @@ -200,6 +208,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + bool has_cbuf_ftou_bug{}; + bool has_bool_ref_bug{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 29c6e1a5f..f71e01a34 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), + .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(), + .has_gl_bool_ref_bug = device.HasBoolRefBug(), .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, @@ -423,6 +425,11 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + + if (Settings::values.dump_shaders) { + env.Dump(key.unique_hashes[index]); + } + if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -509,8 +516,12 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + if (Settings::values.dump_shaders) { + env.Dump(key.Hash()); + } + + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2728353c8..a633b73e5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -517,6 +517,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (Settings::values.dump_shaders) { + env.Dump(key.unique_hashes[index]); + } if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -613,6 +616,12 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + + // Dump it before error. + if (Settings::values.dump_shaders) { + env.Dump(key.Hash()); + } + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector<u32> code{EmitSPIRV(profile, program)}; device.SaveShader(code); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index c3050887c..0ba56ff1e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1344,7 +1344,6 @@ bool Image::ScaleUp(bool ignore) { return false; } has_scaled = true; - const auto& device = runtime->device; if (!scaled_image) { const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = resolution.ScaleUp(info.size.width); @@ -1352,7 +1351,7 @@ bool Image::ScaleUp(bool ignore) { auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; - scaled_image = MakeImage(device, scaled_info); + scaled_image = MakeImage(runtime->device, scaled_info); auto& allocator = runtime->memory_allocator; scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); ignore = false; @@ -1361,18 +1360,13 @@ bool Image::ScaleUp(bool ignore) { if (ignore) { return true; } - if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; - const PixelFormat format = StorageFormat(info.format); - const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; - const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); - } else { + if (NeedsScaleHelper()) { return BlitScaleHelper(true); + } else { + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); } return true; } @@ -1394,15 +1388,10 @@ bool Image::ScaleDown(bool ignore) { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; - const PixelFormat format = StorageFormat(info.format); - const auto& device = runtime->device; - const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; - const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); - } else { + if (NeedsScaleHelper()) { return BlitScaleHelper(false); + } else { + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); } return true; } @@ -1470,6 +1459,20 @@ bool Image::BlitScaleHelper(bool scale_up) { return true; } +bool Image::NeedsScaleHelper() const { + const auto& device = runtime->device; + const bool needs_msaa_helper = info.num_samples > 1 && device.CantBlitMSAA(); + if (needs_msaa_helper) { + return true; + } + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const bool needs_blit_helper = !device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT); + return needs_blit_helper; +} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 2f12be78b..c81130dd2 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -149,6 +149,8 @@ public: private: bool BlitScaleHelper(bool scale_up); + bool NeedsScaleHelper() const; + VKScheduler* scheduler{}; TextureCacheRuntime* runtime{}; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 05850afd0..7d3ae0de4 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <bit> #include <filesystem> #include <fstream> #include <memory> @@ -14,6 +15,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "shader_recompiler/environment.h" #include "video_core/engines/kepler_compute.h" @@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { } } +static std::string_view StageToPrefix(Shader::Stage stage) { + switch (stage) { + case Shader::Stage::VertexB: + return "VB"; + case Shader::Stage::TessellationControl: + return "TC"; + case Shader::Stage::TessellationEval: + return "TE"; + case Shader::Stage::Geometry: + return "GS"; + case Shader::Stage::Fragment: + return "FS"; + case Shader::Stage::Compute: + return "CS"; + case Shader::Stage::VertexA: + return "VA"; + default: + return "UK"; + } +} + +static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest, + u32 initial_offset, Shader::Stage stage) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; + const auto base_dir{shader_dir / "shaders"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories"); + return; + } + const auto prefix = StageToPrefix(stage); + const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)}; + const size_t real_size = read_highest - read_lowest + initial_offset; + const size_t padding_needed = ((32 - (real_size % 32)) % 32); + std::fstream shader_file(name, std::ios::out | std::ios::binary); + const size_t jump_index = initial_offset / sizeof(u64); + shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size); + for (size_t i = 0; i < padding_needed; i++) { + shader_file.put(0); + } +} + GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, u32 start_address_) : gpu_memory{&gpu_memory_}, program_base{program_base_} { @@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const { return Common::CityHash64(data.get(), size); } +void GenericEnvironment::Dump(u64 hash) { + DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage); +} + void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast<u64>(CachedSize())}; const u64 num_texture_types{static_cast<u64>(texture_types.size())}; @@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + initial_offset = sizeof(sph); gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) { if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); + initial_offset = 0; } else { file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); + initial_offset = sizeof(sph); if (stage == Shader::Stage::Geometry) { file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); } } } +void FileEnvironment::Dump(u64 [[maybe_unused]] hash) { + DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage); +} + u64 FileEnvironment::ReadInstruction(u32 address) { if (address < read_lowest || address > read_highest) { throw Shader::LogicError("Out of bounds address {}", address); diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 6640e53d0..aae762b27 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -57,6 +57,8 @@ public: [[nodiscard]] u64 CalculateHash() const; + void Dump(u64 hash) override; + void Serialize(std::ofstream& file) const; protected: @@ -82,6 +84,7 @@ protected: u32 cached_lowest = std::numeric_limits<u32>::max(); u32 cached_highest = 0; + u32 initial_offset = 0; bool has_unbound_instructions = false; }; @@ -149,6 +152,8 @@ public: [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; + void Dump(u64 hash) override; + private: std::unique_ptr<u64[]> code; std::unordered_map<u32, Shader::TextureType> texture_types; @@ -159,6 +164,7 @@ private: u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; + u32 initial_offset{}; }; void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 7bd31b211..d8e19cb2f 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -364,14 +364,14 @@ template <u32 GOB_EXTENT> [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const u32 layer_stride = new_info.layer_stride; - const s32 new_size = layer_stride * new_info.resources.layers; - const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); + const u64 layer_stride = new_info.layer_stride; + const u64 new_size = layer_stride * new_info.resources.layers; + const u64 diff = overlap.gpu_addr - gpu_addr; if (diff > new_size) { return std::nullopt; } - const s32 base_layer = diff / layer_stride; - const s32 mip_offset = diff % layer_stride; + const s32 base_layer = static_cast<s32>(diff / layer_stride); + const s32 mip_offset = static_cast<s32>(diff % layer_stride); const std::array offsets = CalculateMipLevelOffsets(new_info); const auto end = offsets.begin() + new_info.resources.levels; const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset)); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9862b815b..3d78efddc 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -638,15 +638,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } } - if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; + if (ext_vertex_input_dynamic_state && is_intel_windows) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); ext_vertex_input_dynamic_state = false; } - if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + if (is_float16_supported && is_intel_windows) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); is_float16_supported = false; } + if (is_intel_windows) { + LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); + cant_blit_msaa = true; + } supports_d24_depth = IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4c9d86aad..37d140ebd 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -350,6 +350,10 @@ public: return supports_d24_depth; } + bool CantBlitMSAA() const { + return cant_blit_msaa; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -443,6 +447,7 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. // Telemetry parameters std::string vendor_name; ///< Device's driver name. |