summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/gpu.cpp10
-rw-r--r--src/video_core/gpu.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/shader_environment.cpp54
-rw-r--r--src/video_core/shader_environment.h6
-rw-r--r--src/video_core/texture_cache/util.cpp10
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp9
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h5
13 files changed, 147 insertions, 33 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index f524f8bae..705765c99 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -311,6 +311,12 @@ struct GPU::Impl {
cpu_context->MakeCurrent();
}
+ void NotifyShutdown() {
+ std::unique_lock lk{sync_mutex};
+ shutting_down.store(true, std::memory_order::relaxed);
+ sync_cv.notify_all();
+ }
+
/// Obtain the CPU Context
void ObtainContext() {
cpu_context->MakeCurrent();
@@ -858,6 +864,10 @@ void GPU::Start() {
impl->Start();
}
+void GPU::NotifyShutdown() {
+ impl->NotifyShutdown();
+}
+
void GPU::ObtainContext() {
impl->ObtainContext();
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 500411176..3188b83ed 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -232,6 +232,9 @@ public:
/// core timing events.
void Start();
+ /// Performs any additional necessary steps to shutdown GPU emulation.
+ void NotifyShutdown();
+
/// Obtain the CPU Context
void ObtainContext();
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 0764ea6e0..e62912a22 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -182,17 +182,13 @@ Device::Device() {
shader_backend = Settings::ShaderBackend::GLSL;
}
- if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia &&
- !Settings::values.renderer_debug) {
+ if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
const std::string_view driver_version = version.substr(13);
const int version_major =
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
-
if (version_major >= 495) {
- LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems "
- "with yuzu. Forcing GLASM as a mitigation.");
- shader_backend = Settings::ShaderBackend::GLASM;
- use_assembly_shaders = true;
+ has_cbuf_ftou_bug = true;
+ has_bool_ref_bug = true;
}
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de9e41659..95c2e8d38 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -152,6 +152,14 @@ public:
return need_fastmath_off;
}
+ bool HasCbufFtouBug() const {
+ return has_cbuf_ftou_bug;
+ }
+
+ bool HasBoolRefBug() const {
+ return has_bool_ref_bug;
+ }
+
Settings::ShaderBackend GetShaderBackend() const {
return shader_backend;
}
@@ -200,6 +208,8 @@ private:
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
+ bool has_cbuf_ftou_bug{};
+ bool has_bool_ref_bug{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 29c6e1a5f..f71e01a34 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.has_broken_fp16_float_controls = false,
.has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
.has_gl_precise_bug = device.HasPreciseBug(),
+ .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
+ .has_gl_bool_ref_bug = device.HasBoolRefBug(),
.ignore_nan_fp_comparisons = true,
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
},
@@ -423,6 +425,11 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.unique_hashes[index]);
+ }
+
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -509,8 +516,12 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
- auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.Hash());
+ }
+
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
Shader::RuntimeInfo info;
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 2728353c8..a633b73e5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -517,6 +517,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.unique_hashes[index]);
+ }
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -613,6 +616,12 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+
+ // Dump it before error.
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.Hash());
+ }
+
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const std::vector<u32> code{EmitSPIRV(profile, program)};
device.SaveShader(code);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index c3050887c..0ba56ff1e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1344,7 +1344,6 @@ bool Image::ScaleUp(bool ignore) {
return false;
}
has_scaled = true;
- const auto& device = runtime->device;
if (!scaled_image) {
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
@@ -1352,7 +1351,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
- scaled_image = MakeImage(device, scaled_info);
+ scaled_image = MakeImage(runtime->device, scaled_info);
auto& allocator = runtime->memory_allocator;
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
ignore = false;
@@ -1361,18 +1360,13 @@ bool Image::ScaleUp(bool ignore) {
if (ignore) {
return true;
}
-
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
- static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
- const PixelFormat format = StorageFormat(info.format);
- const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
- const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
- if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
- BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
- } else {
+ if (NeedsScaleHelper()) {
return BlitScaleHelper(true);
+ } else {
+ BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
}
return true;
}
@@ -1394,15 +1388,10 @@ bool Image::ScaleDown(bool ignore) {
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
- static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
- const PixelFormat format = StorageFormat(info.format);
- const auto& device = runtime->device;
- const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
- const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
- if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
- BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
- } else {
+ if (NeedsScaleHelper()) {
return BlitScaleHelper(false);
+ } else {
+ BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
}
return true;
}
@@ -1470,6 +1459,20 @@ bool Image::BlitScaleHelper(bool scale_up) {
return true;
}
+bool Image::NeedsScaleHelper() const {
+ const auto& device = runtime->device;
+ const bool needs_msaa_helper = info.num_samples > 1 && device.CantBlitMSAA();
+ if (needs_msaa_helper) {
+ return true;
+ }
+ static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+ const PixelFormat format = StorageFormat(info.format);
+ const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+ const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ const bool needs_blit_helper = !device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT);
+ return needs_blit_helper;
+}
+
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 2f12be78b..c81130dd2 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -149,6 +149,8 @@ public:
private:
bool BlitScaleHelper(bool scale_up);
+ bool NeedsScaleHelper() const;
+
VKScheduler* scheduler{};
TextureCacheRuntime* runtime{};
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 05850afd0..7d3ae0de4 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <bit>
#include <filesystem>
#include <fstream>
#include <memory>
@@ -14,6 +15,7 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "shader_recompiler/environment.h"
#include "video_core/engines/kepler_compute.h"
@@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
}
}
+static std::string_view StageToPrefix(Shader::Stage stage) {
+ switch (stage) {
+ case Shader::Stage::VertexB:
+ return "VB";
+ case Shader::Stage::TessellationControl:
+ return "TC";
+ case Shader::Stage::TessellationEval:
+ return "TE";
+ case Shader::Stage::Geometry:
+ return "GS";
+ case Shader::Stage::Fragment:
+ return "FS";
+ case Shader::Stage::Compute:
+ return "CS";
+ case Shader::Stage::VertexA:
+ return "VA";
+ default:
+ return "UK";
+ }
+}
+
+static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest,
+ u32 initial_offset, Shader::Stage stage) {
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
+ const auto base_dir{shader_dir / "shaders"};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories");
+ return;
+ }
+ const auto prefix = StageToPrefix(stage);
+ const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)};
+ const size_t real_size = read_highest - read_lowest + initial_offset;
+ const size_t padding_needed = ((32 - (real_size % 32)) % 32);
+ std::fstream shader_file(name, std::ios::out | std::ios::binary);
+ const size_t jump_index = initial_offset / sizeof(u64);
+ shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size);
+ for (size_t i = 0; i < padding_needed; i++) {
+ shader_file.put(0);
+ }
+}
+
GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
u32 start_address_)
: gpu_memory{&gpu_memory_}, program_base{program_base_} {
@@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const {
return Common::CityHash64(data.get(), size);
}
+void GenericEnvironment::Dump(u64 hash) {
+ DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage);
+}
+
void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSize())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
@@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
u32 start_address_)
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
+ initial_offset = sizeof(sph);
gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
if (stage == Shader::Stage::Compute) {
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
+ initial_offset = 0;
} else {
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
+ initial_offset = sizeof(sph);
if (stage == Shader::Stage::Geometry) {
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
}
}
}
+void FileEnvironment::Dump(u64 [[maybe_unused]] hash) {
+ DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage);
+}
+
u64 FileEnvironment::ReadInstruction(u32 address) {
if (address < read_lowest || address > read_highest) {
throw Shader::LogicError("Out of bounds address {}", address);
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 6640e53d0..aae762b27 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -57,6 +57,8 @@ public:
[[nodiscard]] u64 CalculateHash() const;
+ void Dump(u64 hash) override;
+
void Serialize(std::ofstream& file) const;
protected:
@@ -82,6 +84,7 @@ protected:
u32 cached_lowest = std::numeric_limits<u32>::max();
u32 cached_highest = 0;
+ u32 initial_offset = 0;
bool has_unbound_instructions = false;
};
@@ -149,6 +152,8 @@ public:
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
+ void Dump(u64 hash) override;
+
private:
std::unique_ptr<u64[]> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
@@ -159,6 +164,7 @@ private:
u32 texture_bound{};
u32 read_lowest{};
u32 read_highest{};
+ u32 initial_offset{};
};
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 7bd31b211..d8e19cb2f 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -364,14 +364,14 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
- const u32 layer_stride = new_info.layer_stride;
- const s32 new_size = layer_stride * new_info.resources.layers;
- const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
+ const u64 layer_stride = new_info.layer_stride;
+ const u64 new_size = layer_stride * new_info.resources.layers;
+ const u64 diff = overlap.gpu_addr - gpu_addr;
if (diff > new_size) {
return std::nullopt;
}
- const s32 base_layer = diff / layer_stride;
- const s32 mip_offset = diff % layer_stride;
+ const s32 base_layer = static_cast<s32>(diff / layer_stride);
+ const s32 mip_offset = static_cast<s32>(diff % layer_stride);
const std::array offsets = CalculateMipLevelOffsets(new_info);
const auto end = offsets.begin() + new_info.resources.levels;
const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 9862b815b..3d78efddc 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -638,15 +638,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
}
- if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
+ if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
}
- if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ if (is_float16_supported && is_intel_windows) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
is_float16_supported = false;
}
+ if (is_intel_windows) {
+ LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
+ cant_blit_msaa = true;
+ }
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4c9d86aad..37d140ebd 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -350,6 +350,10 @@ public:
return supports_d24_depth;
}
+ bool CantBlitMSAA() const {
+ return cant_blit_msaa;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -443,6 +447,7 @@ private:
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
+ bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.