From 894cc9d876a70947aecc7a1a3f9ef869e8088f42 Mon Sep 17 00:00:00 2001 From: Feng Chen Date: Wed, 17 Nov 2021 12:21:17 +0800 Subject: Fix image update/download error when width too small --- .../renderer_opengl/gl_texture_cache.cpp | 27 ++++++++++++++-------- src/video_core/renderer_opengl/gl_texture_cache.h | 1 + 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2f7d98d8b..5cfb6bb8a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -317,13 +317,12 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } -OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { +OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format, + GLsizei gl_num_levels) { const GLenum target = ImageTarget(info); const GLsizei width = info.size.width; const GLsizei height = info.size.height; const GLsizei depth = info.size.depth; - const int max_host_mip_levels = std::bit_width(info.size.width); - const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); const GLsizei num_layers = info.resources.layers; const GLsizei num_samples = info.num_samples; @@ -335,10 +334,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form } switch (target) { case GL_TEXTURE_1D_ARRAY: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); + glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, num_layers); break; case GL_TEXTURE_2D_ARRAY: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); + glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, num_layers); break; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { // TODO: Where should 'fixedsamplelocations' come from? @@ -348,10 +347,10 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form break; } case GL_TEXTURE_RECTANGLE: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); + glTextureStorage2D(handle, gl_num_levels, gl_internal_format, width, height); break; case GL_TEXTURE_3D: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); + glTextureStorage3D(handle, gl_num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: UNREACHABLE(); @@ -686,7 +685,9 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_format = tuple.format; gl_type = tuple.type; } - texture = MakeImage(info, gl_internal_format); + const int max_host_mip_levels = std::bit_width(info.size.width); + gl_num_levels = std::min(info.resources.levels, max_host_mip_levels); + texture = MakeImage(info, gl_internal_format, gl_num_levels); current_texture = texture.handle; if (runtime->device.HasDebuggingToolAttached()) { const std::string name = VideoCommon::Name(*this); @@ -714,6 +715,9 @@ void Image::UploadMemory(const ImageBufferMap& map, u32 current_image_height = std::numeric_limits::max(); for (const VideoCommon::BufferImageCopy& copy : copies) { + if (copy.image_subresource.base_level >= gl_num_levels) { + continue; + } if (current_row_length != copy.buffer_row_length) { current_row_length = copy.buffer_row_length; glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); @@ -743,6 +747,9 @@ void Image::DownloadMemory(ImageBufferMap& map, u32 current_image_height = std::numeric_limits::max(); for (const VideoCommon::BufferImageCopy& copy : copies) { + if (copy.image_subresource.base_level >= gl_num_levels) { + continue; + } if (current_row_length != copy.buffer_row_length) { current_row_length = copy.buffer_row_length; glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); @@ -782,7 +789,7 @@ GLuint Image::StorageHandle() noexcept { } store_view.Create(); glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, - info.resources.levels, 0, info.resources.layers); + gl_num_levels, 0, info.resources.layers); return store_view.handle; default: return current_texture; @@ -946,7 +953,7 @@ void Image::Scale(bool up_scale) { auto dst_info = info; dst_info.size.width = scaled_width; dst_info.size.height = scaled_height; - upscaled_backup = MakeImage(dst_info, gl_internal_format); + upscaled_backup = MakeImage(dst_info, gl_internal_format, gl_num_levels); } const u32 src_width = up_scale ? original_width : scaled_width; const u32 src_height = up_scale ? original_height : scaled_height; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1bb762568..30037a6a2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -219,6 +219,7 @@ private: GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; + GLsizei gl_num_levels{}; TextureCacheRuntime* runtime{}; GLuint current_texture{}; }; -- cgit v1.2.3 From 2348eb41f38a6e52e52d121adfc4c605763209a7 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 17 Nov 2021 15:04:38 -0500 Subject: video_core: Add S8_UINT stencil format --- src/video_core/gpu.h | 1 + src/video_core/surface.cpp | 7 +++++++ src/video_core/surface.h | 14 +++++++++++--- src/video_core/texture_cache/formatter.h | 2 ++ 4 files changed, 21 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 05e5c94f3..c89a5d693 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -83,6 +83,7 @@ enum class DepthFormat : u32 { S8_UINT_Z24_UNORM = 0x14, D24X8_UNORM = 0x15, D24S8_UNORM = 0x16, + S8_UINT = 0x17, D24C8_UNORM = 0x18, D32_FLOAT_S8X24_UINT = 0x19, }; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 58d262446..a36015c8c 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -82,6 +82,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { return PixelFormat::D32_FLOAT; case Tegra::DepthFormat::D16_UNORM: return PixelFormat::D16_UNORM; + case Tegra::DepthFormat::S8_UINT: + return PixelFormat::S8_UINT; case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: return PixelFormat::D32_FLOAT_S8_UINT; default: @@ -213,6 +215,11 @@ SurfaceType GetFormatType(PixelFormat pixel_format) { return SurfaceType::Depth; } + if (static_cast(pixel_format) < + static_cast(PixelFormat::MaxStencilFormat)) { + return SurfaceType::Stencil; + } + if (static_cast(pixel_format) < static_cast(PixelFormat::MaxDepthStencilFormat)) { return SurfaceType::DepthStencil; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 2ce7c7d33..33e8d24ab 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -110,8 +110,12 @@ enum class PixelFormat { MaxDepthFormat, + // Stencil formats + S8_UINT = MaxDepthFormat, + MaxStencilFormat, + // DepthStencil formats - D24_UNORM_S8_UINT = MaxDepthFormat, + D24_UNORM_S8_UINT = MaxStencilFormat, S8_UINT_D24_UNORM, D32_FLOAT_S8_UINT, @@ -125,8 +129,9 @@ constexpr std::size_t MaxPixelFormat = static_cast(PixelFormat::Max enum class SurfaceType { ColorTexture = 0, Depth = 1, - DepthStencil = 2, - Invalid = 3, + Stencil = 2, + DepthStencil = 3, + Invalid = 4, }; enum class SurfaceTarget { @@ -229,6 +234,7 @@ constexpr std::array BLOCK_WIDTH_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM 1, // D32_FLOAT_S8_UINT @@ -328,6 +334,7 @@ constexpr std::array BLOCK_HEIGHT_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM 1, // D32_FLOAT_S8_UINT @@ -427,6 +434,7 @@ constexpr std::array BITS_PER_BLOCK_TABLE = {{ 32, // E5B9G9R9_FLOAT 32, // D32_FLOAT 16, // D16_UNORM + 8, // S8_UINT 32, // D24_UNORM_S8_UINT 32, // S8_UINT_D24_UNORM 64, // D32_FLOAT_S8_UINT diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index c6cf0583f..b2c81057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -194,6 +194,8 @@ struct fmt::formatter : fmt::formatter Date: Wed, 17 Nov 2021 15:05:07 -0500 Subject: renderer_opengl: Implement S8_UINT stencil format --- .../renderer_opengl/gl_texture_cache.cpp | 26 ++++++++++++++++++---- src/video_core/renderer_opengl/gl_texture_cache.h | 4 ++-- src/video_core/renderer_opengl/maxwell_to_gl.h | 1 + 3 files changed, 25 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2f7d98d8b..d46ebd3ea 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -148,6 +148,8 @@ GLenum AttachmentType(PixelFormat format) { switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { case SurfaceType::Depth: return GL_DEPTH_ATTACHMENT; + case SurfaceType::Stencil: + return GL_STENCIL_ATTACHMENT; case SurfaceType::DepthStencil: return GL_DEPTH_STENCIL_ATTACHMENT; default: @@ -897,6 +899,8 @@ void Image::Scale(bool up_scale) { return GL_COLOR_ATTACHMENT0; case SurfaceType::Depth: return GL_DEPTH_ATTACHMENT; + case SurfaceType::Stencil: + return GL_STENCIL_ATTACHMENT; case SurfaceType::DepthStencil: return GL_DEPTH_STENCIL_ATTACHMENT; default: @@ -910,8 +914,10 @@ void Image::Scale(bool up_scale) { return GL_COLOR_BUFFER_BIT; case SurfaceType::Depth: return GL_DEPTH_BUFFER_BIT; + case SurfaceType::Stencil: + return GL_STENCIL_BUFFER_BIT; case SurfaceType::DepthStencil: - return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; + return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; default: UNREACHABLE(); return GL_COLOR_BUFFER_BIT; @@ -923,8 +929,10 @@ void Image::Scale(bool up_scale) { return 0; case SurfaceType::Depth: return 1; - case SurfaceType::DepthStencil: + case SurfaceType::Stencil: return 2; + case SurfaceType::DepthStencil: + return 3; default: UNREACHABLE(); return 0; @@ -1254,10 +1262,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanformat) == SurfaceType::DepthStencil) { + switch (GetFormatType(image_view->format)) { + case SurfaceType::Depth: + buffer_bits |= GL_DEPTH_BUFFER_BIT; + break; + case SurfaceType::Stencil: + buffer_bits |= GL_STENCIL_BUFFER_BIT; + break; + case SurfaceType::DepthStencil: buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } else { + break; + default: + UNREACHABLE(); buffer_bits |= GL_DEPTH_BUFFER_BIT; + break; } const GLenum attachment = AttachmentType(image_view->format); AttachTexture(handle, attachment, image_view); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1bb762568..16224e6b3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -162,8 +162,8 @@ private: std::array null_image_views{}; - std::array rescale_draw_fbos; - std::array rescale_read_fbos; + std::array rescale_draw_fbos; + std::array rescale_read_fbos; const Settings::ResolutionScalingInfo& resolution; }; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 39158aa3e..daba42ed9 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -108,6 +108,7 @@ constexpr std::array FORMAT_TAB {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, -- cgit v1.2.3 From dc61b7045b7ffc3cfe46f0b71f84d5fe709de6c9 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 17 Nov 2021 15:08:08 -0500 Subject: renderer_vulkan: Implement S8_UINT stencil format It should be noted that on Windows, only nvidia gpus support this format natively as of this commit. --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 3 +++ src/video_core/renderer_vulkan/vk_texture_cache.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.cpp | 10 ++++++++++ 3 files changed, 18 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 68a23b602..31adada56 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -208,6 +208,9 @@ struct FormatTuple { {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM + // Stencil formats + {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT + // DepthStencil formats {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 407fd2a15..9bc846b94 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -102,6 +102,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; break; case VideoCore::Surface::SurfaceType::Depth: + case VideoCore::Surface::SurfaceType::Stencil: case VideoCore::Surface::SurfaceType::DepthStencil: usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; break; @@ -173,6 +174,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return VK_IMAGE_ASPECT_COLOR_BIT; case VideoCore::Surface::SurfaceType::Depth: return VK_IMAGE_ASPECT_DEPTH_BIT; + case VideoCore::Surface::SurfaceType::Stencil: + return VK_IMAGE_ASPECT_STENCIL_BIT; case VideoCore::Surface::SurfaceType::DepthStencil: return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; default: @@ -195,6 +198,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { case PixelFormat::D16_UNORM: case PixelFormat::D32_FLOAT: return VK_IMAGE_ASPECT_DEPTH_BIT; + case PixelFormat::S8_UINT: + return VK_IMAGE_ASPECT_STENCIL_BIT; default: return VK_IMAGE_ASPECT_COLOR_BIT; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 95106f88f..70c52aaac 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -21,6 +21,13 @@ namespace Vulkan { namespace { namespace Alternatives { +constexpr std::array STENCIL8_UINT{ + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_UNDEFINED, +}; + constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT, @@ -74,6 +81,8 @@ void SetNext(void**& next, T& data) { constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { switch (format) { + case VK_FORMAT_S8_UINT: + return Alternatives::STENCIL8_UINT.data(); case VK_FORMAT_D24_UNORM_S8_UINT: return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); case VK_FORMAT_D16_UNORM_S8_UINT: @@ -145,6 +154,7 @@ std::unordered_map GetFormatProperties(vk::Physica VK_FORMAT_R4G4B4A4_UNORM_PACK16, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D16_UNORM, + VK_FORMAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, -- cgit v1.2.3 From 2ec7fcecb7d1f0bc8f943a3f7cb4d2e215bc4e76 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 03:17:02 +0100 Subject: Vulkan: implement D24S8 <-> RGBA8 convertions. --- src/video_core/host_shaders/CMakeLists.txt | 2 + .../host_shaders/convert_abgr8_to_d24s8.frag | 17 ++++ .../host_shaders/convert_d24s8_to_abgr8.frag | 21 +++++ src/video_core/renderer_vulkan/blit_image.cpp | 98 ++++++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.h | 16 ++++ .../renderer_vulkan/vk_texture_cache.cpp | 12 +++ 6 files changed, 166 insertions(+) create mode 100644 src/video_core/host_shaders/convert_abgr8_to_d24s8.frag create mode 100644 src/video_core/host_shaders/convert_d24s8_to_abgr8.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index d779a967a..fd3e41434 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -10,6 +10,8 @@ set(SHADER_FILES astc_decoder.comp block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + convert_abgr8_to_d24s8.frag + convert_d24s8_to_abgr8.frag convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag new file mode 100644 index 000000000..f7657e50a --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -0,0 +1,17 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +// #extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f)); + uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; + + gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); + // gl_FragStencilRefARB = int(color.a); +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag new file mode 100644 index 000000000..ff3bf8209 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + color.r = float(depth >> 16) / (exp2(8) - 1.0); + color.g = float((depth >> 8) & 0x00FF) / (exp2(8) - 1.0); + color.b = float(depth & 0x00FF) / (exp2(8) - 1.0); + color.a = float(stencil) / (exp2(8) - 1.0); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b3884a4f5..01535d0c0 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,6 +4,8 @@ #include +#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" @@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), + convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), + convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { if (device.IsExtShaderStencilExportSupported()) { @@ -448,6 +452,23 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } +void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_abgr8_to_d24s8_frag, true); + Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + +void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift) { + ConvertPipelineEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_abgr8_frag, false); + ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -495,6 +516,54 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } +void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_depth_view = src_image_view.DepthView(); + const VkImageView src_stencil_view = src_image_view.StencilView(); + const VkSampler sampler = *nearest_sampler; + const VkExtent2D extent{ + .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), + .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale, + down_shift, this](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); + UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, + src_stencil_view); + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { const auto it = std::ranges::find(blit_color_keys, key); if (it != blit_color_keys.end()) { @@ -636,4 +705,33 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend }); } +void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *module); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index d77f76678..f754a7294 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -56,10 +56,19 @@ public: void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); + + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + u32 up_scale, u32 down_shift); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift); + [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); @@ -68,6 +77,9 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture); + const Device& device; VKScheduler& scheduler; StateTracker& state_tracker; @@ -83,6 +95,8 @@ private: vk::ShaderModule blit_depth_stencil_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; + vk::ShaderModule convert_abgr8_to_d24s8_frag; + vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -94,6 +108,8 @@ private: vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; + vk::Pipeline convert_abgr8_to_d24s8_pipeline; + vk::Pipeline convert_d24s8_to_abgr8_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 407fd2a15..6dfd45f31 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -881,6 +881,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); } break; + case PixelFormat::A8B8G8R8_UNORM: + case PixelFormat::B8G8R8A8_UNORM: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); + } + break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); @@ -891,6 +897,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); } break; + case PixelFormat::S8_UINT_D24_UNORM: + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM) { + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); + } + break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); -- cgit v1.2.3 From b130f648d7c629411c487722f864c6bafcd2562c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 03:17:54 +0100 Subject: TextureCache: Fix regression caused by ART and improve blit detection algorithm to be smarter. --- src/video_core/texture_cache/texture_cache.h | 9 +++------ src/video_core/texture_cache/util.cpp | 28 ++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 241f71a91..5ade3ce55 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -475,6 +475,7 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const BlitImages images = GetBlitImages(dst, src); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); PrepareImage(dst_id, true, false); @@ -1094,12 +1095,8 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { continue; } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } + src_id = FindOrInsertImage(src_info, src_addr); + dst_id = FindOrInsertImage(dst_info, dst_addr); } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index ddc9fb13a..8f9eb387c 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1151,17 +1151,37 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { - if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + if (src) { src_info.format = src->info.format; + src_info.num_samples = src->info.num_samples; + src_info.size = src->info.size; } - if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + if (dst) { dst_info.format = dst->info.format; + dst_info.num_samples = dst->info.num_samples; + dst_info.size = dst->info.size; } if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { - dst_info.format = src->info.format; + if (dst) { + src_info.format = dst_info.format; + } else { + dst_info.format = src->info.format; + } } if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { - src_info.format = dst->info.format; + if (src) { + if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { + dst_info.format = src->info.format; + } + } else { + src_info.format = dst->info.format; + } + } + if (src_info.num_samples > 1) { + dst_info.format = src_info.format; + } + if (dst_info.num_samples > 1) { + src_info.format = dst_info.format; } } -- cgit v1.2.3 From 0ff228405faae92a39167b9aec072e14744eae35 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 05:46:57 +0100 Subject: TextureCache: force same image format when resolving an image. --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++-- src/video_core/texture_cache/types.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5ade3ce55..06257f064 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -759,7 +759,8 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, return ImageId{}; } } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool broken_views = + runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews); const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { @@ -1096,7 +1097,12 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( continue; } src_id = FindOrInsertImage(src_info, src_addr); - dst_id = FindOrInsertImage(dst_info, dst_addr); + RelaxedOptions dst_options{}; + if (src_info.num_samples > 1) { + // it's a resolve, we must enforce the same format. + dst_options = RelaxedOptions::ForceBrokenViews; + } + dst_id = FindOrInsertImage(dst_info, dst_addr, dst_options); } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 5c274abdf..5ac27b3a7 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 { Size = 1 << 0, Format = 1 << 1, Samples = 1 << 2, + ForceBrokenViews = 1 << 3, }; DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) -- cgit v1.2.3 From b805c7bf058c6da04620cf75880509bdf6d5986c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 06:27:44 +0100 Subject: TextureCache: Implement additional D24S8 convertions. --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ .../host_shaders/convert_d24s8_to_b10g11r11.frag | 21 +++++++++++++++++++++ .../host_shaders/convert_d24s8_to_r16g16.frag | 21 +++++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.cpp | 22 ++++++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.h | 10 ++++++++++ .../renderer_vulkan/vk_texture_cache.cpp | 10 ++++++++++ 6 files changed, 86 insertions(+) create mode 100644 src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag create mode 100644 src/video_core/host_shaders/convert_d24s8_to_r16g16.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index fd3e41434..87042195a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,6 +12,8 @@ set(SHADER_FILES block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag convert_d24s8_to_abgr8.frag + convert_d24s8_to_b10g11r11.frag + convert_d24s8_to_r16g16.frag convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag new file mode 100644 index 000000000..c743d3a13 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + color.b = float(depth >> 22) / (exp2(10) - 1.0); + color.g = float((depth >> 11) & 0x00FF) / (exp2(11) - 1.0); + color.r = float(depth & 0x00FF) / (exp2(11) - 1.0); + color.a = 1.0f; +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag new file mode 100644 index 000000000..2a9443d3d --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + color.r = float(depth >> 16) / (exp2(16) - 1.0); + color.g = float((depth >> 16) & 0x00FF) / (exp2(16) - 1.0); + color.b = 0.0f; + color.a = 1.0f; +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 01535d0c0..12b28aadd 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -6,6 +6,8 @@ #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" @@ -358,6 +360,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), + convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), + convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { if (device.IsExtShaderStencilExportSupported()) { @@ -469,6 +473,24 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, down_shift); } +void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_b10g11r11_frag, false); + ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view, + up_scale, down_shift); +} + +void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_r16g16_frag, false); + ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view, + up_scale, down_shift); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index f754a7294..10d24c4b7 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -62,6 +62,12 @@ public: void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + u32 up_scale, u32 down_shift); + + void ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + u32 up_scale, u32 down_shift); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -97,6 +103,8 @@ private: vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; + vk::ShaderModule convert_d24s8_to_b10g11r11_frag; + vk::ShaderModule convert_d24s8_to_r16g16_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -110,6 +118,8 @@ private: vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; + vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; + vk::Pipeline convert_d24s8_to_r16g16_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 6dfd45f31..fd6064271 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -887,6 +887,16 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); } break; + case PixelFormat::B10G11R11_FLOAT: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToB10G11R11(dst, src_view, up_scale, down_shift); + } + break; + case PixelFormat::R16G16_UNORM: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToR16G16(dst, src_view, up_scale, down_shift); + } + break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); -- cgit v1.2.3 From 6f896d1fae3d244f83450a485d15e7cebe79abaa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 22:23:48 +0100 Subject: TextureCache: Further fixes on resolve algorithm. --- src/video_core/texture_cache/texture_cache.h | 8 ++++---- src/video_core/texture_cache/util.cpp | 25 +++++++++++++------------ 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 06257f064..4188f93c5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1096,13 +1096,13 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { continue; } - src_id = FindOrInsertImage(src_info, src_addr); - RelaxedOptions dst_options{}; + RelaxedOptions find_options{}; if (src_info.num_samples > 1) { // it's a resolve, we must enforce the same format. - dst_options = RelaxedOptions::ForceBrokenViews; + find_options = RelaxedOptions::ForceBrokenViews; } - dst_id = FindOrInsertImage(dst_info, dst_addr, dst_options); + src_id = FindOrInsertImage(src_info, src_addr, find_options); + dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 8f9eb387c..e4d82631e 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1151,19 +1151,25 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { + bool is_resolve = false; + const auto original_src_format = src_info.format; + const auto original_dst_format = dst_info.format; if (src) { - src_info.format = src->info.format; + if (GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } + is_resolve = src->info.num_samples > 1; src_info.num_samples = src->info.num_samples; src_info.size = src->info.size; } - if (dst) { + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { dst_info.format = dst->info.format; - dst_info.num_samples = dst->info.num_samples; - dst_info.size = dst->info.size; } if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { if (dst) { - src_info.format = dst_info.format; + if (GetFormatType(dst->info.format) == SurfaceType::ColorTexture) { + src_info.format = original_src_format; + } } else { dst_info.format = src->info.format; } @@ -1171,18 +1177,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { if (src) { if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { - dst_info.format = src->info.format; + dst_info.format = original_dst_format; } } else { src_info.format = dst->info.format; } } - if (src_info.num_samples > 1) { - dst_info.format = src_info.format; - } - if (dst_info.num_samples > 1) { - src_info.format = dst_info.format; - } + ASSERT(!is_resolve || dst_info.format == src_info.format); } u32 MapSizeBytes(const ImageBase& image) { -- cgit v1.2.3 From 1d5e6a51d7f66cf089d541a009c84c373fd5c6ab Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 23:22:44 +0100 Subject: TextureCache: Add B10G11R11 to D24S8 converter. --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/convert_b10g11r11_to_d24s8.frag | 19 +++++++ src/video_core/renderer_vulkan/blit_image.cpp | 62 ++++++++++++++++++---- src/video_core/renderer_vulkan/blit_image.h | 12 ++++- .../renderer_vulkan/vk_texture_cache.cpp | 3 ++ 5 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 87042195a..a2e046f12 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag + convert_b10g11r11_to_d24s8.frag convert_d24s8_to_abgr8.frag convert_d24s8_to_b10g11r11.frag convert_d24s8_to_r16g16.frag diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag new file mode 100644 index 000000000..b7358c15c --- /dev/null +++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +// #extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 color = texelFetch(color_texture, coord, 0).rgba; + uint depth_stencil_unorm = (uint(color.b * (exp2(10) - 1.0f)) << 22) + | (uint(color.g * (exp2(11) - 1.0f)) << 11) + | (uint(color.r * (exp2(11) - 1.0f))); + + gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); + // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 12b28aadd..e70459de5 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -5,6 +5,7 @@ #include #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_b10g11r11_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" @@ -359,6 +360,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), + convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), @@ -459,16 +461,25 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_abgr8_to_d24s8_frag, true); + ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_abgr8_to_d24s8_frag, true); Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } +void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineDepthTargetEx(convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_b10g11r11_to_d24s8_frag, true); + Convert(*convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_abgr8_frag, false); + ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_abgr8_frag, false); ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } @@ -476,8 +487,8 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_b10g11r11_frag, false); + ConvertPipelineColorTargetEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_b10g11r11_frag, false); ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } @@ -485,8 +496,8 @@ void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_r16g16_frag, false); + ConvertPipelineColorTargetEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_r16g16_frag, false); ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } @@ -540,7 +551,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkImageView src_depth_view = src_image_view.DepthView(); const VkImageView src_stencil_view = src_image_view.StencilView(); const VkSampler sampler = *nearest_sampler; @@ -727,8 +738,37 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend }); } -void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { +void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *module); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { if (pipeline) { return; } diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 10d24c4b7..607964b5e 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -59,6 +59,9 @@ public: void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -83,8 +86,11 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); - void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture); + void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture); + + void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture); const Device& device; VKScheduler& scheduler; @@ -102,6 +108,7 @@ private: vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; + vk::ShaderModule convert_b10g11r11_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_b10g11r11_frag; vk::ShaderModule convert_d24s8_to_r16g16_frag; @@ -117,6 +124,7 @@ private: vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; + vk::Pipeline convert_b10g11r11_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; vk::Pipeline convert_d24s8_to_r16g16_pipeline; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index fd6064271..28a659c0e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -912,6 +912,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im src_view.format == PixelFormat::B8G8R8A8_UNORM) { return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); } + if (src_view.format == PixelFormat::B10G11R11_FLOAT) { + return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift); + } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { -- cgit v1.2.3 From e02cff2f69f9a90777f87f85f290f83fc04c16ec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Nov 2021 00:02:12 +0100 Subject: TextureCache: Add R16G16 to D24S8 converter. --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/convert_r16g16_to_d24s8.frag | 18 ++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.cpp | 11 +++++++++++ src/video_core/renderer_vulkan/blit_image.h | 5 +++++ src/video_core/renderer_vulkan/vk_texture_cache.cpp | 3 +++ 5 files changed, 38 insertions(+) create mode 100644 src/video_core/host_shaders/convert_r16g16_to_d24s8.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index a2e046f12..1c91999d7 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -17,6 +17,7 @@ set(SHADER_FILES convert_d24s8_to_r16g16.frag convert_depth_to_float.frag convert_float_to_depth.frag + convert_r16g16_to_d24s8.frag full_screen_triangle.vert fxaa.frag fxaa.vert diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag new file mode 100644 index 000000000..7b1b914f6 --- /dev/null +++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +// #extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 color = texelFetch(color_texture, coord, 0).rgba; + uint depth_stencil_unorm = (uint(color.r * (exp2(16) - 1.0f)) << 16) + | (uint(color.g * (exp2(16) - 1.0f)) << 16); + + gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); + // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index e70459de5..28b631f73 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -11,6 +11,7 @@ #include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/convert_r16g16_to_d24s8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" @@ -361,6 +362,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)), + convert_r16g16_to_d24s8_frag(BuildShader(device, CONVERT_R16G16_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), @@ -476,6 +478,15 @@ void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer down_shift); } +void BlitImageHelper::ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineDepthTargetEx(convert_r16g16_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_r16g16_to_d24s8_frag, true); + Convert(*convert_r16g16_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 607964b5e..cec095341 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -62,6 +62,9 @@ public: void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -109,6 +112,7 @@ private: vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; vk::ShaderModule convert_b10g11r11_to_d24s8_frag; + vk::ShaderModule convert_r16g16_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_b10g11r11_frag; vk::ShaderModule convert_d24s8_to_r16g16_frag; @@ -125,6 +129,7 @@ private: vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; vk::Pipeline convert_b10g11r11_to_d24s8_pipeline; + vk::Pipeline convert_r16g16_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; vk::Pipeline convert_d24s8_to_r16g16_pipeline; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 28a659c0e..af1a11059 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -915,6 +915,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im if (src_view.format == PixelFormat::B10G11R11_FLOAT) { return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift); } + if (src_view.format == PixelFormat::R16G16_UNORM) { + return blit_image_helper.ConvertR16G16ToD24S8(dst, src_view, up_scale, down_shift); + } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { -- cgit v1.2.3 From 0857f82913d0bcf2de4721233f74cd40ecddcdae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Nov 2021 06:15:29 +0100 Subject: TextureCache: Implement buffer copies on Vulkan. --- .../renderer_opengl/gl_texture_cache.cpp | 4 +- src/video_core/renderer_opengl/gl_texture_cache.h | 7 +- .../renderer_vulkan/vk_texture_cache.cpp | 174 +++++++++++++++++++++ src/video_core/renderer_vulkan/vk_texture_cache.h | 11 +- src/video_core/texture_cache/texture_cache.h | 4 +- src/video_core/texture_cache/texture_cache_base.h | 2 - 6 files changed, 193 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6956535e5..e70bbec81 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -526,8 +526,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, } } -void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, - std::span copies) { +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, + std::span copies) { LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); format_conversion_pass.ConvertImage(dst, src, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 578f8d523..ad5157d66 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -84,9 +84,13 @@ public: u64 GetDeviceLocalMemory() const; + bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { + return true; + } + void CopyImage(Image& dst, Image& src, std::span copies); - void ConvertImage(Image& dst, Image& src, std::span copies); + void ReinterpretImage(Image& dst, Image& src, std::span copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { UNIMPLEMENTED(); @@ -338,7 +342,6 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index af1a11059..02215cfc2 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -308,6 +308,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } +[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, + VkImageAspectFlags aspect_mask) noexcept { + return VkBufferImageCopy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = MakeImageSubresourceLayers( + is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), + .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), + .imageExtent = MakeExtent3D(copy.extent), + }; +} + [[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); @@ -754,6 +767,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return staging_buffer_pool.Request(size, MemoryUsage::Download); } +bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { + if (VideoCore::Surface::GetFormatType(dst.info.format) == + VideoCore::Surface::SurfaceType::DepthStencil) { + return !device.IsExtShaderStencilExportSupported(); + } + return false; +} + +[[nodiscard]] size_t NextPow2(size_t value) { + return static_cast(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); +} + +VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { + const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); + if (buffer_commits[level]) { + return *buffers[level]; + } + const auto new_size = NextPow2(needed_size); + VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + buffers[level] = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = new_size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + buffer_commits[level] = std::make_unique( + memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); + return *buffers[level]; +} + +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, + std::span copies) { + std::vector vk_in_copies(copies.size()); + std::vector vk_out_copies(copies.size()); + const VkImageAspectFlags src_aspect_mask = src.AspectMask(); + const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); + + std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { + return MakeBufferImageCopy(copy, true, src_aspect_mask); + }); + std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { + return MakeBufferImageCopy(copy, false, dst_aspect_mask); + }); + const u32 img_bpp = BytesPerBlock(src.info.format); + size_t total_size = 0; + for (const auto& copy : copies) { + total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; + } + const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); + const VkImage dst_image = dst.Handle(); + const VkImage src_image = src.Handle(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, + vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { + RangedBarrierRange dst_range; + RangedBarrierRange src_range; + for (const VkBufferImageCopy& copy : vk_in_copies) { + src_range.AddLayers(copy.imageSubresource); + } + for (const VkBufferImageCopy& copy : vk_out_copies) { + dst_range.AddLayers(copy.imageSubresource); + } + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const std::array pre_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + }, + }; + const std::array middle_in_barrier{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + }, + }; + const std::array middle_out_barrier{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + }, + }; + const std::array post_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, pre_barriers); + + cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, + vk_in_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, WRITE_BARRIER, nullptr, middle_in_barrier); + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, READ_BARRIER, {}, middle_out_barrier); + cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, {}, {}, post_barriers); + }); +} + void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f5f8f9a74..44e9dcee4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -61,6 +61,10 @@ public: void CopyImage(Image& dst, Image& src, std::span copies); + bool ShouldReinterpret(Image& dst, Image& src); + + void ReinterpretImage(Image& dst, Image& src, std::span copies); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); bool CanAccelerateImageUpload(Image&) const noexcept { @@ -82,6 +86,8 @@ public: return true; } + [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); + const Device& device; VKScheduler& scheduler; MemoryAllocator& memory_allocator; @@ -90,6 +96,10 @@ public: ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; const Settings::ResolutionScalingInfo& resolution; + + constexpr static size_t indexing_slots = 8 * sizeof(size_t); + std::array buffers{}; + std::array, indexing_slots> buffer_commits{}; }; class Image : public VideoCommon::ImageBase { @@ -316,7 +326,6 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4188f93c5..44a0d42ba 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1762,8 +1762,8 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector Date: Sat, 20 Nov 2021 06:17:01 +0100 Subject: TextureCache: Assure full conversions on depth/stencil write shaders. --- src/video_core/host_shaders/convert_abgr8_to_d24s8.frag | 4 ++-- src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag | 4 ++-- src/video_core/host_shaders/convert_r16g16_to_d24s8.frag | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag index f7657e50a..4e4ab6a26 100644 --- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -13,5 +13,5 @@ void main() { uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(color.a); + gl_FragStencilRefARB = int(color.a); } diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag index b7358c15c..2999a84cf 100644 --- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -15,5 +15,5 @@ void main() { | (uint(color.r * (exp2(11) - 1.0f))); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); } diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag index 7b1b914f6..3df70575e 100644 --- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -14,5 +14,5 @@ void main() { | (uint(color.g * (exp2(16) - 1.0f)) << 16); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); } -- cgit v1.2.3 From da2fe8190518d3266df7f4a48f9b651eaea84d4b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Nov 2021 14:46:19 +0100 Subject: TextureCache: Refactor and fix linux compiling. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 7 ++----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e70bbec81..ecb215a7d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,7 @@ #include +#include "common/bit_util.h" #include "common/literals.h" #include "common/settings.h" #include "video_core/renderer_opengl/gl_device.h" @@ -397,9 +398,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return GL_R32UI; } -[[nodiscard]] u32 NextPow2(u32 value) { - return 1U << (32U - std::countl_zero(value - 1U)); -} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -1308,7 +1306,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image, const u32 copy_size = region.width * region.height * region.depth * img_bpp; if (pbo_size < copy_size) { intermediate_pbo.Create(); - pbo_size = NextPow2(copy_size); + pbo_size = Common::NextPow2(copy_size); glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY); } // Copy from source to PBO diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 02215cfc2..f194110e5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -8,6 +8,7 @@ #include #include "common/bit_cast.h" +#include "common/bit_util.h" #include "common/settings.h" #include "video_core/engines/fermi_2d.h" @@ -775,16 +776,12 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { return false; } -[[nodiscard]] size_t NextPow2(size_t value) { - return static_cast(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); -} - VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); if (buffer_commits[level]) { return *buffers[level]; } - const auto new_size = NextPow2(needed_size); + const auto new_size = Common::NextPow2(needed_size); VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; -- cgit v1.2.3 From fe1f06c856b768e9afcc9ba9ab8ef09b7152678c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 20 Nov 2021 17:48:22 -0500 Subject: Fix screenshot dimensions when at 1x scale This was regressed by ART. Prior to ART, the screenshots were saved at the title's framebuffer resolution. A misunderstanding of the existing logic led to screenshot dimensions becoming dependent on the host render window size. This changes the behavior to match how it was prior to ART at 1x, with screenshots now always being the title's framebuffer dimensions scaled by the resolution scaling factor. --- src/video_core/video_core.cpp | 6 ------ src/video_core/video_core.h | 2 -- 2 files changed, 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index e852c817e..329bf4def 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -55,10 +55,4 @@ std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor } } -float GetResolutionScaleFactor(const RendererBase& renderer) { - return Settings::values.resolution_info.active - ? Settings::values.resolution_info.up_factor - : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio(); -} - } // namespace VideoCore diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f86877e86..084df641f 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -25,6 +25,4 @@ class RendererBase; /// Creates an emulated GPU instance using the given system context. std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); -float GetResolutionScaleFactor(const RendererBase& renderer); - } // namespace VideoCore -- cgit v1.2.3 From 095bc88428a0c744136969441e4763ddb5c697a6 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 20 Nov 2021 21:18:37 -0500 Subject: vk_blit_image: Consolidate CreatePipelineTargetEx functions --- src/video_core/renderer_vulkan/blit_image.cpp | 38 +++++++-------------------- src/video_core/renderer_vulkan/blit_image.h | 3 +++ 2 files changed, 13 insertions(+), 28 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 28b631f73..a63d4d222 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -749,8 +749,9 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend }); } -void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { +void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool is_target_depth, + bool single_texture) { if (pipeline) { return; } @@ -767,7 +768,7 @@ void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRen .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = nullptr, + .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, @@ -778,33 +779,14 @@ void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRen }); } +void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { + ConvertPipelineEx(pipeline, renderpass, module, false, single_texture); +} + void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, vk::ShaderModule& module, bool single_texture) { - if (pipeline) { - return; - } - const std::array stages = MakeStages(*full_screen_vert, *module); - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(stages.size()), - .pStages = stages.data(), - .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pTessellationState = nullptr, - .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, - .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }); + ConvertPipelineEx(pipeline, renderpass, module, true, single_texture); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index cec095341..3455c75f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -89,6 +89,9 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool is_target_depth, bool single_texture); + void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, vk::ShaderModule& module, bool single_texture); -- cgit v1.2.3 From a41c6dafea6e00d674cfcf3e1b6576fa208acf81 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 20 Nov 2021 21:49:37 -0500 Subject: vk_texture_cache: Mark VkBufferUsageFlags as static constexpr --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 3964424af..c72f0c897 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -787,9 +787,9 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { return *buffers[level]; } const auto new_size = Common::NextPow2(needed_size); - VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + static constexpr VkBufferUsageFlags flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; buffers[level] = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, -- cgit v1.2.3 From 779f4ac72d2ea2788c2106c8d2d1ec0e01b77b81 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 21 Nov 2021 05:32:34 +0100 Subject: TextureCache: Eliminate format deduction as full depth conversion has been supported. --- src/video_core/texture_cache/texture_cache.h | 6 ++---- src/video_core/texture_cache/util.cpp | 28 +++------------------------- 2 files changed, 5 insertions(+), 29 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44a0d42ba..0e4907c53 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1079,7 +1079,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA template typename TextureCache

::BlitImages TextureCache

::GetBlitImages( const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples; const GPUVAddr dst_addr = dst.Address(); const GPUVAddr src_addr = src.Address(); ImageInfo dst_info(dst); @@ -1093,9 +1093,7 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; - } + ASSERT(GetFormatType(dst_info.format) == GetFormatType(src_info.format)); RelaxedOptions find_options{}; if (src_info.num_samples > 1) { // it's a resolve, we must enforce the same format. diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index e4d82631e..777503488 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1152,36 +1152,14 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { bool is_resolve = false; - const auto original_src_format = src_info.format; - const auto original_dst_format = dst_info.format; if (src) { - if (GetFormatType(src->info.format) != SurfaceType::ColorTexture) { - src_info.format = src->info.format; - } is_resolve = src->info.num_samples > 1; src_info.num_samples = src->info.num_samples; src_info.size = src->info.size; } - if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { - dst_info.format = dst->info.format; - } - if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { - if (dst) { - if (GetFormatType(dst->info.format) == SurfaceType::ColorTexture) { - src_info.format = original_src_format; - } - } else { - dst_info.format = src->info.format; - } - } - if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { - if (src) { - if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { - dst_info.format = original_dst_format; - } - } else { - src_info.format = dst->info.format; - } + if (dst) { + dst_info.num_samples = dst->info.num_samples; + dst_info.size = dst->info.size; } ASSERT(!is_resolve || dst_info.format == src_info.format); } -- cgit v1.2.3 From b96caf200d047b81554c3839c7a6a7c35b251944 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 21 Nov 2021 20:52:39 +0100 Subject: HostShaders: Fix D24S8 convertion shaders. --- .../host_shaders/convert_abgr8_to_d24s8.frag | 7 ++++--- .../host_shaders/convert_b10g11r11_to_d24s8.frag | 18 +++++++++++++----- .../host_shaders/convert_d24s8_to_abgr8.frag | 10 ++++++---- .../host_shaders/convert_d24s8_to_b10g11r11.frag | 19 +++++++++++++++---- .../host_shaders/convert_d24s8_to_r16g16.frag | 7 ++++--- .../host_shaders/convert_r16g16_to_d24s8.frag | 9 +++++---- 6 files changed, 47 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag index 4e4ab6a26..d51397a0c 100644 --- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -10,8 +10,9 @@ layout(binding = 0) uniform sampler2D color_texture; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f)); - uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; + uvec4 bytes = color << uvec4(24, 16, 8, 0); + uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w; - gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(color.a); + gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); + gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); } diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag index 2999a84cf..11bdd861d 100644 --- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag @@ -7,13 +7,21 @@ layout(binding = 0) uniform sampler2D color_texture; +uint conv_from_float(float value_f, uint mantissa_bits) { + uint value = floatBitsToInt(value_f); + uint exp = (value >> 23) & 0x1Fu; + uint mantissa_shift = 32u - mantissa_bits; + uint mantissa = (value << 9u) >> mantissa_shift; + return (exp << mantissa_bits) | mantissa; +} + void main() { ivec2 coord = ivec2(gl_FragCoord.xy); vec4 color = texelFetch(color_texture, coord, 0).rgba; - uint depth_stencil_unorm = (uint(color.b * (exp2(10) - 1.0f)) << 22) - | (uint(color.g * (exp2(11) - 1.0f)) << 11) - | (uint(color.r * (exp2(11) - 1.0f))); + uint depth_stencil_unorm = (conv_from_float(color.r, 6u) << 21) + | (conv_from_float(color.g, 6u) << 10) + | conv_from_float(color.b, 5u); - gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); + gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); } diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag index ff3bf8209..47f9c1abc 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -14,8 +14,10 @@ void main() { uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - color.r = float(depth >> 16) / (exp2(8) - 1.0); - color.g = float((depth >> 8) & 0x00FF) / (exp2(8) - 1.0); - color.b = float(depth & 0x00FF) / (exp2(8) - 1.0); - color.a = float(stencil) / (exp2(8) - 1.0); + highp uint depth_val = + uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); + lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; + highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); + color = vec4(components) / (exp2(8.0) - 1.0); } diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag index c743d3a13..c2d935fcd 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag @@ -9,13 +9,24 @@ layout(binding = 1) uniform isampler2D stencil_tex; layout(location = 0) out vec4 color; +float conv_to_float(uint value, uint mantissa_bits) { + uint exp = (value >> mantissa_bits) & 0x1Fu; + uint mantissa_shift = 32u - mantissa_bits; + uint mantissa = (value << mantissa_shift) >> mantissa_shift; + return uintBitsToFloat((exp << 23) | (mantissa << (23 - mantissa_bits))); +} + void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f)); uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + uint depth_stencil = (stencil << 24) | (depth >> 8); + uint red_int = (depth_stencil >> 21) & 0x07FF; + uint green_int = (depth_stencil >> 10) & 0x07FF; + uint blue_int = depth_stencil & 0x03FF; - color.b = float(depth >> 22) / (exp2(10) - 1.0); - color.g = float((depth >> 11) & 0x00FF) / (exp2(11) - 1.0); - color.r = float(depth & 0x00FF) / (exp2(11) - 1.0); + color.r = conv_to_float(red_int, 6u); + color.g = conv_to_float(green_int, 6u); + color.b = conv_to_float(blue_int, 5u); color.a = 1.0f; } diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag index 2a9443d3d..c48a7ac66 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag @@ -11,11 +11,12 @@ layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f)); uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + uint depth_stencil = (stencil << 24) | (depth >> 8); - color.r = float(depth >> 16) / (exp2(16) - 1.0); - color.g = float((depth >> 16) & 0x00FF) / (exp2(16) - 1.0); + color.r = float(depth_stencil & 0x0000FFFFu) / (exp2(16) - 1.0); + color.g = float(depth_stencil >> 16) / (exp2(16) - 1.0); color.b = 0.0f; color.a = 1.0f; } diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag index 3df70575e..beb2d1284 100644 --- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag @@ -10,9 +10,10 @@ layout(binding = 0) uniform sampler2D color_texture; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); vec4 color = texelFetch(color_texture, coord, 0).rgba; - uint depth_stencil_unorm = (uint(color.r * (exp2(16) - 1.0f)) << 16) - | (uint(color.g * (exp2(16) - 1.0f)) << 16); + uvec2 bytes = uvec2(color.rg * (exp2(16) - 1.0f)) << uvec2(0, 16); + uint depth_stencil_unorm = + uint(color.r * (exp2(16) - 1.0f)) | (uint(color.g * (exp2(16) - 1.0f)) << 16); - gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); + gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); } -- cgit v1.2.3 From d7f4434bd534d53e8aea293e39629bf8ca8ee123 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 21 Nov 2021 21:09:49 +0100 Subject: VulkanTexturECache: Use reinterpret on D32_S8 formats. --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 3964424af..e1ba1bdaf 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -775,8 +775,13 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { if (VideoCore::Surface::GetFormatType(dst.info.format) == - VideoCore::Surface::SurfaceType::DepthStencil) { - return !device.IsExtShaderStencilExportSupported(); + VideoCore::Surface::SurfaceType::DepthStencil && + !device.IsExtShaderStencilExportSupported()) { + return true; + } + if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT || + src.info.format == PixelFormat::D32_FLOAT_S8_UINT) { + return true; } return false; } -- cgit v1.2.3 From 853284943901560081f6ff992b6c04b7c33f0d21 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 22 Nov 2021 00:00:01 +0100 Subject: TextureCache: Simplify blitting of D24S8 formats and fix bugs. --- src/video_core/host_shaders/CMakeLists.txt | 4 - .../host_shaders/convert_b10g11r11_to_d24s8.frag | 27 ------ .../host_shaders/convert_d24s8_to_b10g11r11.frag | 32 ------- .../host_shaders/convert_d24s8_to_r16g16.frag | 22 ----- .../host_shaders/convert_r16g16_to_d24s8.frag | 19 ----- src/video_core/renderer_vulkan/blit_image.cpp | 98 +++++++++++----------- src/video_core/renderer_vulkan/blit_image.h | 25 +----- .../renderer_vulkan/vk_texture_cache.cpp | 30 ++----- src/video_core/renderer_vulkan/vk_texture_cache.h | 3 + src/video_core/texture_cache/texture_cache.h | 8 +- 10 files changed, 73 insertions(+), 195 deletions(-) delete mode 100644 src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag delete mode 100644 src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag delete mode 100644 src/video_core/host_shaders/convert_d24s8_to_r16g16.frag delete mode 100644 src/video_core/host_shaders/convert_r16g16_to_d24s8.frag (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 1c91999d7..fd3e41434 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,13 +11,9 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag - convert_b10g11r11_to_d24s8.frag convert_d24s8_to_abgr8.frag - convert_d24s8_to_b10g11r11.frag - convert_d24s8_to_r16g16.frag convert_depth_to_float.frag convert_float_to_depth.frag - convert_r16g16_to_d24s8.frag full_screen_triangle.vert fxaa.frag fxaa.vert diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag deleted file mode 100644 index 11bdd861d..000000000 --- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 -#extension GL_ARB_shader_stencil_export : require - -layout(binding = 0) uniform sampler2D color_texture; - -uint conv_from_float(float value_f, uint mantissa_bits) { - uint value = floatBitsToInt(value_f); - uint exp = (value >> 23) & 0x1Fu; - uint mantissa_shift = 32u - mantissa_bits; - uint mantissa = (value << 9u) >> mantissa_shift; - return (exp << mantissa_bits) | mantissa; -} - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - vec4 color = texelFetch(color_texture, coord, 0).rgba; - uint depth_stencil_unorm = (conv_from_float(color.r, 6u) << 21) - | (conv_from_float(color.g, 6u) << 10) - | conv_from_float(color.b, 5u); - - gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); -} diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag deleted file mode 100644 index c2d935fcd..000000000 --- a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 - -layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; - -layout(location = 0) out vec4 color; - -float conv_to_float(uint value, uint mantissa_bits) { - uint exp = (value >> mantissa_bits) & 0x1Fu; - uint mantissa_shift = 32u - mantissa_bits; - uint mantissa = (value << mantissa_shift) >> mantissa_shift; - return uintBitsToFloat((exp << 23) | (mantissa << (23 - mantissa_bits))); -} - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - uint depth_stencil = (stencil << 24) | (depth >> 8); - uint red_int = (depth_stencil >> 21) & 0x07FF; - uint green_int = (depth_stencil >> 10) & 0x07FF; - uint blue_int = depth_stencil & 0x03FF; - - color.r = conv_to_float(red_int, 6u); - color.g = conv_to_float(green_int, 6u); - color.b = conv_to_float(blue_int, 5u); - color.a = 1.0f; -} diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag deleted file mode 100644 index c48a7ac66..000000000 --- a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 - -layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; - -layout(location = 0) out vec4 color; - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - uint depth_stencil = (stencil << 24) | (depth >> 8); - - color.r = float(depth_stencil & 0x0000FFFFu) / (exp2(16) - 1.0); - color.g = float(depth_stencil >> 16) / (exp2(16) - 1.0); - color.b = 0.0f; - color.a = 1.0f; -} diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag deleted file mode 100644 index beb2d1284..000000000 --- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 450 -#extension GL_ARB_shader_stencil_export : require - -layout(binding = 0) uniform sampler2D color_texture; - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - vec4 color = texelFetch(color_texture, coord, 0).rgba; - uvec2 bytes = uvec2(color.rg * (exp2(16) - 1.0f)) << uvec2(0, 16); - uint depth_stencil_unorm = - uint(color.r * (exp2(16) - 1.0f)) | (uint(color.g * (exp2(16) - 1.0f)) << 16); - - gl_FragDepth = float(depth_stencil_unorm & 0x00FFFFFFu) / (exp2(24.0) - 1.0f); - gl_FragStencilRefARB = int(depth_stencil_unorm >> 24); -} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 28b631f73..2e69e270f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -5,13 +5,9 @@ #include #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" -#include "video_core/host_shaders/convert_b10g11r11_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" -#include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h" -#include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" -#include "video_core/host_shaders/convert_r16g16_to_d24s8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" @@ -361,11 +357,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), - convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)), - convert_r16g16_to_d24s8_frag(BuildShader(device, CONVERT_R16G16_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), - convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), - convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { if (device.IsExtShaderStencilExportSupported()) { @@ -461,30 +453,11 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, } void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + ImageView& src_image_view, u32 up_scale, u32 down_shift) { ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), convert_abgr8_to_d24s8_frag, true); - Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); -} - -void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineDepthTargetEx(convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_b10g11r11_to_d24s8_frag, true); - Convert(*convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); -} - -void BlitImageHelper::ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineDepthTargetEx(convert_r16g16_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_r16g16_to_d24s8_frag, true); - Convert(*convert_r16g16_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); + ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); } void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, @@ -495,24 +468,6 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, down_shift); } -void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineColorTargetEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_b10g11r11_frag, false); - ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view, - up_scale, down_shift); -} - -void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, - u32 down_shift) { - ConvertPipelineColorTargetEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_r16g16_frag, false); - ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view, - up_scale, down_shift); -} - void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -560,6 +515,53 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } +void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.ColorView(); + const VkSampler sampler = *nearest_sampler; + const VkExtent2D extent{ + .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), + .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, + this](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *two_textures_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index cec095341..0b73cf444 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -56,28 +56,19 @@ public: void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, u32 down_shift); - - void ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); - void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); - - void ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); - private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -111,11 +102,7 @@ private: vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; - vk::ShaderModule convert_b10g11r11_to_d24s8_frag; - vk::ShaderModule convert_r16g16_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; - vk::ShaderModule convert_d24s8_to_b10g11r11_frag; - vk::ShaderModule convert_d24s8_to_r16g16_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -128,11 +115,7 @@ private: vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; - vk::Pipeline convert_b10g11r11_to_d24s8_pipeline; - vk::Pipeline convert_r16g16_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; - vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; - vk::Pipeline convert_d24s8_to_r16g16_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e1ba1bdaf..ef8ae6cb6 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1063,21 +1063,10 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::A8B8G8R8_UNORM: - case PixelFormat::B8G8R8A8_UNORM: if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); } break; - case PixelFormat::B10G11R11_FLOAT: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToB10G11R11(dst, src_view, up_scale, down_shift); - } - break; - case PixelFormat::R16G16_UNORM: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToR16G16(dst, src_view, up_scale, down_shift); - } - break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); @@ -1089,16 +1078,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::S8_UINT_D24_UNORM: - if (src_view.format == PixelFormat::A8B8G8R8_UNORM || - src_view.format == PixelFormat::B8G8R8A8_UNORM) { - return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); - } - if (src_view.format == PixelFormat::B10G11R11_FLOAT) { - return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift); - } - if (src_view.format == PixelFormat::R16G16_UNORM) { - return blit_image_helper.ConvertR16G16ToD24S8(dst, src_view, up_scale, down_shift); - } + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { @@ -1595,6 +1575,14 @@ VkImageView ImageView::StencilView() { return *stencil_view; } +VkImageView ImageView::ColorView() { + if (color_view) { + return *color_view; + } + color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT); + return *color_view; +} + VkImageView ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { if (image_format == Shader::ImageFormat::Typeless) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 44e9dcee4..753e3e8a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -184,6 +184,8 @@ public: [[nodiscard]] VkImageView StencilView(); + [[nodiscard]] VkImageView ColorView(); + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); @@ -224,6 +226,7 @@ private: std::unique_ptr storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; + vk::ImageView color_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0e4907c53..9548abec8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1781,7 +1781,13 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector Date: Mon, 22 Nov 2021 00:06:56 +0100 Subject: Texture Cache: Always copy on NVIDIA. --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ef8ae6cb6..51246d46f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -779,6 +779,11 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { !device.IsExtShaderStencilExportSupported()) { return true; } + if (VideoCore::Surface::GetFormatType(src.info.format) == + VideoCore::Surface::SurfaceType::DepthStencil && + !device.IsExtShaderStencilExportSupported()) { + return true; + } if (dst.info.format == PixelFormat::D32_FLOAT_S8_UINT || src.info.format == PixelFormat::D32_FLOAT_S8_UINT) { return true; -- cgit v1.2.3 From 1e474fb9d1c5e35c2bb8822d58556ef3358e66e9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 22 Nov 2021 00:21:42 +0100 Subject: Texture Cache: Correct conversion shaders. --- src/video_core/host_shaders/convert_abgr8_to_d24s8.frag | 2 +- src/video_core/host_shaders/convert_d24s8_to_abgr8.frag | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag index d51397a0c..ea055ddad 100644 --- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -9,7 +9,7 @@ layout(binding = 0) uniform sampler2D color_texture; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f)); + uvec4 color = uvec4(texelFetch(color_texture, coord, 0).abgr * (exp2(8) - 1.0f)); uvec4 bytes = color << uvec4(24, 16, 8, 0); uint depth_stencil_unorm = bytes.x | bytes.y | bytes.z | bytes.w; diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag index 47f9c1abc..94368fb59 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -19,5 +19,5 @@ void main() { lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; highp uvec4 components = uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); - color = vec4(components) / (exp2(8.0) - 1.0); + color.abgr = vec4(components) / (exp2(8.0) - 1.0); } -- cgit v1.2.3 From 08674aee87e385e5f2a3b8e1b9aa85a61e23a490 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 22 Nov 2021 06:07:21 +0100 Subject: Texture Cache: Fix issue with blitting 3D textures. --- src/video_core/texture_cache/util.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 777503488..9b1613008 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1155,11 +1155,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* if (src) { is_resolve = src->info.num_samples > 1; src_info.num_samples = src->info.num_samples; - src_info.size = src->info.size; + src_info.size.width = src->info.size.width; + src_info.size.height = src->info.size.height; } if (dst) { dst_info.num_samples = dst->info.num_samples; - dst_info.size = dst->info.size; + dst_info.size.width = dst->info.size.width; + dst_info.size.height = dst->info.size.height; } ASSERT(!is_resolve || dst_info.format == src_info.format); } -- cgit v1.2.3 From 72aa418b0b412855683633d2799da1eb190ab6d5 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Wed, 24 Nov 2021 17:23:57 -0700 Subject: video_core/codecs: fix multiple decoding issues on Linux ... * when someone installed Intel video drivers on an AMD system, the decoder will select the Intel VA-API decoding driver and yuzu will crash due to incorrect driver selection; the fix will check if the currently about-to-use driver is loaded in the kernel * when using NVIDIA driver on Linux with a ffmpeg that does not have CUDA capability enabled, the decoder will crash; the fix simply making the decoder prefers the VDPAU driver over CUDA on Linux --- src/video_core/command_classes/codecs/codec.cpp | 49 ++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 916277811..403ce30fe 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include "common/assert.h" @@ -59,6 +60,36 @@ Codec::~Codec() { av_buffer_unref(&av_gpu_decoder); } +#ifdef LIBVA_FOUND +// List all the currently loaded Linux modules +static std::vector ListLinuxKernelModules() { + std::vector modules{}; + auto module_listing = fopen("/proc/modules", "rt"); + char* buffer = nullptr; + size_t buf_len = 0; + if (!module_listing) { + LOG_WARNING(Service_NVDRV, "Could not open /proc/modules to collect available modules"); + return modules; + } + while (getline(&buffer, &buf_len, module_listing) != -1) { + // format for the module listing file (sysfs) + // + auto line = std::string(buffer); + // we are only interested in module names + auto name_pos = line.find_first_of(" "); + if (name_pos == std::string::npos) { + continue; + } + modules.push_back(line.erase(name_pos + 1)); + } + if (buffer) { + free(buffer); + } + fclose(module_listing); + return modules; +} +#endif + bool Codec::CreateGpuAvDevice() { #if defined(LIBVA_FOUND) static constexpr std::array VAAPI_DRIVERS = { @@ -67,8 +98,21 @@ bool Codec::CreateGpuAvDevice() { "amdgpu", }; AVDictionary* hwdevice_options = nullptr; + auto loaded_modules = ListLinuxKernelModules(); av_dict_set(&hwdevice_options, "connection_type", "drm", 0); for (const auto& driver : VAAPI_DRIVERS) { + bool found = false; + // first check if the target driver is loaded in the kernel + for (const auto& module : loaded_modules) { + if (module == driver) { + found = true; + break; + } + } + if (!found) { + LOG_DEBUG(Service_NVDRV, "Kernel driver {} is not loaded, trying the next one", driver); + continue; + } av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, nullptr, hwdevice_options, 0); @@ -85,11 +129,12 @@ bool Codec::CreateGpuAvDevice() { #endif static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; static constexpr std::array GPU_DECODER_TYPES{ +#ifdef linux + AV_HWDEVICE_TYPE_VDPAU, +#endif AV_HWDEVICE_TYPE_CUDA, #ifdef _WIN32 AV_HWDEVICE_TYPE_D3D11VA, -#else - AV_HWDEVICE_TYPE_VDPAU, #endif }; for (const auto& type : GPU_DECODER_TYPES) { -- cgit v1.2.3 From 60928cf8cd0d6f46826d588926969913d7fc6740 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Wed, 24 Nov 2021 18:00:55 -0700 Subject: video_core/codec: address comments --- src/video_core/command_classes/codecs/codec.cpp | 28 ++++++++++--------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 403ce30fe..02d309170 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include @@ -63,15 +64,16 @@ Codec::~Codec() { #ifdef LIBVA_FOUND // List all the currently loaded Linux modules static std::vector ListLinuxKernelModules() { + using FILEPtr = std::unique_ptr; + auto module_listing = FILEPtr{fopen("/proc/modules", "rt"), std::fclose}; std::vector modules{}; - auto module_listing = fopen("/proc/modules", "rt"); - char* buffer = nullptr; - size_t buf_len = 0; if (!module_listing) { LOG_WARNING(Service_NVDRV, "Could not open /proc/modules to collect available modules"); return modules; } - while (getline(&buffer, &buf_len, module_listing) != -1) { + char* buffer = nullptr; + size_t buf_len = 0; + while (getline(&buffer, &buf_len, module_listing.get()) != -1) { // format for the module listing file (sysfs) // auto line = std::string(buffer); @@ -80,12 +82,9 @@ static std::vector ListLinuxKernelModules() { if (name_pos == std::string::npos) { continue; } - modules.push_back(line.erase(name_pos + 1)); + modules.push_back(line.erase(name_pos)); } - if (buffer) { - free(buffer); - } - fclose(module_listing); + free(buffer); return modules; } #endif @@ -98,17 +97,12 @@ bool Codec::CreateGpuAvDevice() { "amdgpu", }; AVDictionary* hwdevice_options = nullptr; - auto loaded_modules = ListLinuxKernelModules(); + const auto loaded_modules = ListLinuxKernelModules(); av_dict_set(&hwdevice_options, "connection_type", "drm", 0); for (const auto& driver : VAAPI_DRIVERS) { - bool found = false; // first check if the target driver is loaded in the kernel - for (const auto& module : loaded_modules) { - if (module == driver) { - found = true; - break; - } - } + bool found = std::any_of(loaded_modules.begin(), loaded_modules.end(), + [&driver](const auto& module) { return module == driver; }); if (!found) { LOG_DEBUG(Service_NVDRV, "Kernel driver {} is not loaded, trying the next one", driver); continue; -- cgit v1.2.3 From 1624f307d0ebd68751b567f6a616f635567754fa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 26 Nov 2021 17:03:48 +0100 Subject: Texture Cache: Further fix regressions. --- src/video_core/texture_cache/texture_cache.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9548abec8..570da2b04 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1088,19 +1088,23 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( ImageId src_id; do { has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - ASSERT(GetFormatType(dst_info.format) == GetFormatType(src_info.format)); - RelaxedOptions find_options{}; - if (src_info.num_samples > 1) { - // it's a resolve, we must enforce the same format. - find_options = RelaxedOptions::ForceBrokenViews; - } - src_id = FindOrInsertImage(src_info, src_addr, find_options); - dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); + if (src_image && src_image->info.num_samples > 1) { + RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; + src_id = FindOrInsertImage(src_info, src_addr, find_options); + dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); + if (has_deleted_images) { + continue; + } + } + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, -- cgit v1.2.3 From ecefc932e64bf4ab8442d3c9808a2e54429e7001 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 26 Nov 2021 21:36:53 +0100 Subject: Texture Cache: Redesigning the blitting system (again). --- src/video_core/texture_cache/texture_cache.h | 52 +++++++++++++++++++---- src/video_core/texture_cache/texture_cache_base.h | 3 +- src/video_core/texture_cache/util.cpp | 32 ++++++++------ 3 files changed, 64 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 570da2b04..f24de9a38 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -472,7 +472,7 @@ template void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); + const BlitImages images = GetBlitImages(dst, src, copy); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; @@ -762,12 +762,15 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, const bool broken_views = runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews); const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; + const bool flexible_formats = True(options & RelaxedOptions::Format); + ImageId image_id{}; + boost::container::small_vector image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) + [[unlikely]] { const bool strict_size = False(options & RelaxedOptions::Size) && True(existing_image.flags & ImageFlagBits::Strong); const ImageInfo& existing = existing_image.info; @@ -776,17 +779,27 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, IsPitchLinearSameSize(existing, info, strict_size) && IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { image_id = existing_image_id; - return true; + image_ids.push_back(existing_image_id); + return !flexible_formats && existing.format == info.format; } } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, native_bgr)) { image_id = existing_image_id; - return true; + image_ids.push_back(existing_image_id); + return !flexible_formats && existing_image.info.format == info.format; } return false; }; ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; + if (image_ids.size() <= 1) [[likely]] { + return image_id; + } + auto image_ids_compare = [this](ImageId a, ImageId b) { + auto& image_a = slot_images[a]; + auto& image_b = slot_images[b]; + return image_a.modification_tick < image_b.modification_tick; + }; + return *std::ranges::max_element(image_ids, image_ids_compare); } template @@ -1078,17 +1091,26 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA template typename TextureCache

::BlitImages TextureCache

::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Samples; const GPUVAddr dst_addr = dst.Address(); const GPUVAddr src_addr = src.Address(); ImageInfo dst_info(dst); ImageInfo src_info(src); + const bool can_be_depth_blit = + dst_info.format == src_info.format && copy.filter == Tegra::Engines::Fermi2D::Filter::Point; ImageId dst_id; ImageId src_id; + RelaxedOptions try_options = FIND_OPTIONS; + if (can_be_depth_blit) { + try_options |= RelaxedOptions::Format; + } do { has_deleted_images = false; - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, try_options); + dst_id = FindImage(dst_info, dst_addr, try_options); const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; if (src_image && src_image->info.num_samples > 1) { RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; @@ -1097,8 +1119,15 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( if (has_deleted_images) { continue; } + break; + } + if (can_be_depth_blit) { + const ImageBase* const dst_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } } - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); if (!src_id) { src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); } @@ -1106,6 +1135,11 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); } } while (has_deleted_images); + if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) { + // Make sure the images are depth and/or stencil textures. + src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); + dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); + } return BlitImages{ .dst_id = dst_id, .src_id = src_id, diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 643ad811c..7107887a6 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -252,7 +252,8 @@ private: /// Return a blit image pair from the given guest blit parameters [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src); + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); /// Find or create a sampler from a guest descriptor sampler [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 9b1613008..7bd31b211 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1151,19 +1151,25 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { - bool is_resolve = false; - if (src) { - is_resolve = src->info.num_samples > 1; - src_info.num_samples = src->info.num_samples; - src_info.size.width = src->info.size.width; - src_info.size.height = src->info.size.height; - } - if (dst) { - dst_info.num_samples = dst->info.num_samples; - dst_info.size.width = dst->info.size.width; - dst_info.size.height = dst->info.size.height; - } - ASSERT(!is_resolve || dst_info.format == src_info.format); + const auto original_dst_format = dst_info.format; + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + dst_info.format = dst->info.format; + } + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + dst_info.format = src->info.format; + } + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + if (src) { + if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { + dst_info.format = original_dst_format; + } + } else { + src_info.format = dst->info.format; + } + } } u32 MapSizeBytes(const ImageBase& image) { -- cgit v1.2.3 From 5a3463bc2b1489dda6b5fe90110f9260f6b68463 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 27 Nov 2021 23:49:56 +0100 Subject: Texture Cache: Secure insertions against deletions. --- src/video_core/texture_cache/texture_cache.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f24de9a38..565b99254 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1137,8 +1137,11 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( } while (has_deleted_images); if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) { // Make sure the images are depth and/or stencil textures. - src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); - dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); + do { + has_deleted_images = false; + src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); + dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); + } while (has_deleted_images); } return BlitImages{ .dst_id = dst_id, @@ -1196,7 +1199,14 @@ template ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, bool is_clear) { const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + ImageId image_id{}; + bool delete_state = has_deleted_images; + do { + has_deleted_images = false; + image_id = FindOrInsertImage(info, gpu_addr, options); + delete_state |= has_deleted_images; + } while (has_deleted_images); + has_deleted_images = delete_state; if (!image_id) { return NULL_IMAGE_VIEW_ID; } -- cgit v1.2.3 From 524a9baa7ea33125d5e6ba48f277c81fb7a612e3 Mon Sep 17 00:00:00 2001 From: Feng Chen Date: Mon, 29 Nov 2021 12:39:37 +0800 Subject: Add missing pixel format mapping --- src/video_core/texture_cache/format_lookup_table.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index ddfb726fe..afa807d5d 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -139,6 +139,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::D16_UNORM; case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): -- cgit v1.2.3 From 2c47f8aa1886522898b5b3a73185b5662be3e9f3 Mon Sep 17 00:00:00 2001 From: Feng Chen Date: Thu, 2 Dec 2021 12:19:43 +0800 Subject: Support multiple videos playing --- src/video_core/gpu.cpp | 43 +++++++++++++------------------------------ src/video_core/gpu.h | 4 ++-- 2 files changed, 15 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ab7c21a49..27a47954d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -185,16 +185,6 @@ struct GPU::Impl { return *dma_pusher; } - /// Returns a reference to the GPU CDMA pusher. - [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { - return *cdma_pusher; - } - - /// Returns a const reference to the GPU CDMA pusher. - [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { - return *cdma_pusher; - } - /// Returns a reference to the underlying renderer. [[nodiscard]] VideoCore::RendererBase& Renderer() { return *renderer; @@ -338,25 +328,26 @@ struct GPU::Impl { } /// Push GPU command buffer entries to be processed - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { if (!use_nvdec) { return; } - if (!cdma_pusher) { - cdma_pusher = std::make_unique(gpu); + if (cdma_pushers.find(id) == cdma_pushers.end()) { + cdma_pushers[id] = std::make_unique(gpu); } // SubmitCommandBuffer would make the nvdec operations async, this is not currently working // TODO(ameerj): RE proper async nvdec operation // gpu_thread.SubmitCommandBuffer(std::move(entries)); - - cdma_pusher->ProcessEntries(std::move(entries)); + cdma_pushers[id]->ProcessEntries(std::move(entries)); } /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance() { - cdma_pusher.reset(); + void ClearCdmaInstance(u32 id) { + if (cdma_pushers.find(id) != cdma_pushers.end()) { + cdma_pushers.erase(id); + } } /// Swap buffers (render frame) @@ -659,7 +650,7 @@ struct GPU::Impl { Core::System& system; std::unique_ptr memory_manager; std::unique_ptr dma_pusher; - std::unique_ptr cdma_pusher; + std::map> cdma_pushers; std::unique_ptr renderer; VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; @@ -811,14 +802,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const { return impl->DmaPusher(); } -Tegra::CDmaPusher& GPU::CDmaPusher() { - return impl->CDmaPusher(); -} - -const Tegra::CDmaPusher& GPU::CDmaPusher() const { - return impl->CDmaPusher(); -} - VideoCore::RendererBase& GPU::Renderer() { return impl->Renderer(); } @@ -887,12 +870,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) { impl->PushGPUEntries(std::move(entries)); } -void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - impl->PushCommandBuffer(entries); +void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { + impl->PushCommandBuffer(id, entries); } -void GPU::ClearCdmaInstance() { - impl->ClearCdmaInstance(); +void GPU::ClearCdmaInstance(u32 id) { + impl->ClearCdmaInstance(id); } void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index c89a5d693..500411176 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -242,10 +242,10 @@ public: void PushGPUEntries(Tegra::CommandList&& entries); /// Push GPU command buffer entries to be processed - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); + void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance(); + void ClearCdmaInstance(u32 id); /// Swap buffers (render frame) void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); -- cgit v1.2.3 From 762b8ad448369cc770beae4d8368a6258b13709e Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Thu, 2 Dec 2021 14:20:43 -0500 Subject: general: Replace high_resolution_clock with steady_clock On some OSes, high_resolution_clock is an alias to system_clock and is not monotonic in nature. Replace this with steady_clock. --- src/video_core/shader_notify.cpp | 2 +- src/video_core/shader_notify.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index dc6995b46..bcaf5f575 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept { const int now_complete = num_complete.load(std::memory_order::relaxed); const int now_building = num_building.load(std::memory_order::relaxed); if (now_complete == now_building) { - const auto now = std::chrono::high_resolution_clock::now(); + const auto now = std::chrono::steady_clock::now(); if (completed && num_complete == num_when_completed) { if (now - complete_time > TIME_TO_STOP_REPORTING) { report_base = now_complete; diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index ad363bfb5..4d8d52071 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -28,6 +28,6 @@ private: bool completed{}; int num_when_completed{}; - std::chrono::high_resolution_clock::time_point complete_time; + std::chrono::steady_clock::time_point complete_time; }; } // namespace VideoCore -- cgit v1.2.3 From cd27f211c8ad67c73c831e57a4eb298f9693253f Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sun, 28 Nov 2021 23:51:25 -0700 Subject: video_core/codecs: more robust ffmpeg hwdecoder selection logic --- src/video_core/command_classes/codecs/codec.cpp | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 02d309170..1949a8cf3 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,6 +23,14 @@ namespace Tegra { namespace { constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; +constexpr std::array PREFERRED_GPU_DECODERS = {AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_DXVA2, +#elif linux + AV_HWDEVICE_TYPE_VDPAU, +#endif + // last resort for Linux Flatpak (w/ NVIDIA) + AV_HWDEVICE_TYPE_VULKAN}; void AVPacketDeleter(AVPacket* ptr) { av_packet_free(&ptr); @@ -61,6 +69,19 @@ Codec::~Codec() { av_buffer_unref(&av_gpu_decoder); } +// List all the currently available hwcontext in ffmpeg +static std::vector ListSupportedContexts() { + std::vector contexts{}; + enum AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; + do { + current_device_type = av_hwdevice_iterate_types(current_device_type); + // filter out VA-API since we will try that first if supported + if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) + contexts.push_back(current_device_type); + } while (current_device_type != AV_HWDEVICE_TYPE_NONE); + return contexts; +} + #ifdef LIBVA_FOUND // List all the currently loaded Linux modules static std::vector ListLinuxKernelModules() { @@ -122,16 +143,12 @@ bool Codec::CreateGpuAvDevice() { av_dict_free(&hwdevice_options); #endif static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; - static constexpr std::array GPU_DECODER_TYPES{ -#ifdef linux - AV_HWDEVICE_TYPE_VDPAU, -#endif - AV_HWDEVICE_TYPE_CUDA, -#ifdef _WIN32 - AV_HWDEVICE_TYPE_D3D11VA, -#endif - }; - for (const auto& type : GPU_DECODER_TYPES) { + static const auto supported_contexts = ListSupportedContexts(); + for (const auto& type : PREFERRED_GPU_DECODERS) { + if (std::none_of(supported_contexts.begin(), supported_contexts.end(), + [&type](const auto& context) { return context == type; })) { + continue; + } const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); if (hwdevice_res < 0) { LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", -- cgit v1.2.3 From 20a46790d7059c7fa8efeb1c95e62a57d97e42e3 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Mon, 29 Nov 2021 16:47:24 -0700 Subject: video_core/codec: address comments --- src/video_core/command_classes/codecs/codec.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 1949a8cf3..2c0d8da64 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,14 +23,17 @@ namespace Tegra { namespace { constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; -constexpr std::array PREFERRED_GPU_DECODERS = {AV_HWDEVICE_TYPE_CUDA, +constexpr std::array PREFERRED_GPU_DECODERS = { + AV_HWDEVICE_TYPE_CUDA, #ifdef _WIN32 - AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_DXVA2, -#elif linux - AV_HWDEVICE_TYPE_VDPAU, + AV_HWDEVICE_TYPE_D3D11VA, + AV_HWDEVICE_TYPE_DXVA2, +#elif defined(__linux__) + AV_HWDEVICE_TYPE_VDPAU, #endif - // last resort for Linux Flatpak (w/ NVIDIA) - AV_HWDEVICE_TYPE_VULKAN}; + // last resort for Linux Flatpak (w/ NVIDIA) + AV_HWDEVICE_TYPE_VULKAN, +}; void AVPacketDeleter(AVPacket* ptr) { av_packet_free(&ptr); @@ -72,12 +75,13 @@ Codec::~Codec() { // List all the currently available hwcontext in ffmpeg static std::vector ListSupportedContexts() { std::vector contexts{}; - enum AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; + AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; do { current_device_type = av_hwdevice_iterate_types(current_device_type); // filter out VA-API since we will try that first if supported - if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) + if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) { contexts.push_back(current_device_type); + } } while (current_device_type != AV_HWDEVICE_TYPE_NONE); return contexts; } -- cgit v1.2.3 From a578df4c6bd06c622baddd77d4e456150a673121 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Thu, 2 Dec 2021 21:27:41 -0700 Subject: video_core/codecs: more fixes for VAAPI detection ... * skip impersonated VAAPI implementaions ("imposter detection") * place VAAPI priority below CUDA/NVDEC/CUVID --- src/video_core/command_classes/codecs/codec.cpp | 88 +++++++------------------ 1 file changed, 25 insertions(+), 63 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 2c0d8da64..2a532b883 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -17,6 +17,10 @@ extern "C" { #include +#ifdef LIBVA_FOUND +// for querying VAAPI driver information +#include +#endif } namespace Tegra { @@ -29,6 +33,7 @@ constexpr std::array PREFERRED_GPU_DECODERS = { AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_DXVA2, #elif defined(__linux__) + AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_VDPAU, #endif // last resort for Linux Flatpak (w/ NVIDIA) @@ -78,79 +83,18 @@ static std::vector ListSupportedContexts() { AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; do { current_device_type = av_hwdevice_iterate_types(current_device_type); - // filter out VA-API since we will try that first if supported - if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) { - contexts.push_back(current_device_type); - } + contexts.push_back(current_device_type); } while (current_device_type != AV_HWDEVICE_TYPE_NONE); return contexts; } -#ifdef LIBVA_FOUND -// List all the currently loaded Linux modules -static std::vector ListLinuxKernelModules() { - using FILEPtr = std::unique_ptr; - auto module_listing = FILEPtr{fopen("/proc/modules", "rt"), std::fclose}; - std::vector modules{}; - if (!module_listing) { - LOG_WARNING(Service_NVDRV, "Could not open /proc/modules to collect available modules"); - return modules; - } - char* buffer = nullptr; - size_t buf_len = 0; - while (getline(&buffer, &buf_len, module_listing.get()) != -1) { - // format for the module listing file (sysfs) - // - auto line = std::string(buffer); - // we are only interested in module names - auto name_pos = line.find_first_of(" "); - if (name_pos == std::string::npos) { - continue; - } - modules.push_back(line.erase(name_pos)); - } - free(buffer); - return modules; -} -#endif - bool Codec::CreateGpuAvDevice() { -#if defined(LIBVA_FOUND) - static constexpr std::array VAAPI_DRIVERS = { - "i915", - "iHD", - "amdgpu", - }; - AVDictionary* hwdevice_options = nullptr; - const auto loaded_modules = ListLinuxKernelModules(); - av_dict_set(&hwdevice_options, "connection_type", "drm", 0); - for (const auto& driver : VAAPI_DRIVERS) { - // first check if the target driver is loaded in the kernel - bool found = std::any_of(loaded_modules.begin(), loaded_modules.end(), - [&driver](const auto& module) { return module == driver; }); - if (!found) { - LOG_DEBUG(Service_NVDRV, "Kernel driver {} is not loaded, trying the next one", driver); - continue; - } - av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, - nullptr, hwdevice_options, 0); - if (hwdevice_error >= 0) { - LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); - av_dict_free(&hwdevice_options); - av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; - return true; - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); - av_dict_free(&hwdevice_options); -#endif static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; static const auto supported_contexts = ListSupportedContexts(); for (const auto& type : PREFERRED_GPU_DECODERS) { if (std::none_of(supported_contexts.begin(), supported_contexts.end(), [&type](const auto& context) { return context == type; })) { + LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); continue; } const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); @@ -159,6 +103,24 @@ bool Codec::CreateGpuAvDevice() { av_hwdevice_get_type_name(type), hwdevice_res); continue; } +#ifdef LIBVA_FOUND + if (type == AV_HWDEVICE_TYPE_VAAPI) { + // we need to determine if this is an impersonated VAAPI driver + AVHWDeviceContext* hwctx = + static_cast(static_cast(av_gpu_decoder->data)); + AVVAAPIDeviceContext* vactx = static_cast(hwctx->hwctx); + const char* vendor_name = vaQueryVendorString(vactx->display); + if (strstr(vendor_name, "VDPAU backend")) { + // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them + LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver"); + continue; + } else { + // according to some user testing, certain vaapi driver (Intel?) could be buggy + // so let's log the driver name which may help the developers/supporters + LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name); + } + } +#endif for (int i = 0;; i++) { const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); if (!config) { -- cgit v1.2.3 From e7f10de11a935423b233cda0b156e5e8e786bd1f Mon Sep 17 00:00:00 2001 From: liushuyu Date: Thu, 2 Dec 2021 22:35:30 -0700 Subject: video_core/cmake: link against libva explicitly ... ... to fix build on Flatpak (and self-builds) --- src/video_core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..6a6325e38 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(host_shaders) if(LIBVA_FOUND) set_source_files_properties(command_classes/codecs/codec.cpp PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) + list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) endif() add_library(video_core STATIC -- cgit v1.2.3 From a5c212516cc4ca73807e03ab7c40e469ecabd061 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 4 Dec 2021 10:20:28 +0100 Subject: Texture Cache: Fix crashes on NVIDIA. --- src/video_core/texture_cache/texture_cache.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 565b99254..e195b1e98 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1122,7 +1122,7 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( break; } if (can_be_depth_blit) { - const ImageBase* const dst_image = src_id ? &slot_images[src_id] : nullptr; + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; DeduceBlitImages(dst_info, src_info, dst_image, src_image); if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { continue; @@ -1135,8 +1135,11 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); } } while (has_deleted_images); - if (GetFormatType(dst_info.format) != SurfaceType::ColorTexture) { - // Make sure the images are depth and/or stencil textures. + const ImageBase& src_image = slot_images[src_id]; + const ImageBase& dst_image = slot_images[dst_id]; + if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) || + GetFormatType(src_info.format) != GetFormatType(src_image.info.format)) { + // Make sure the images match the expected format. do { has_deleted_images = false; src_id = FindOrInsertImage(src_info, src_addr, RelaxedOptions{}); -- cgit v1.2.3 From 5462485cc3835941713b835bce3b671b15d210b7 Mon Sep 17 00:00:00 2001 From: Feng Chen Date: Fri, 3 Dec 2021 12:31:07 +0800 Subject: Address feedback --- src/video_core/gpu.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 27a47954d..8788f5148 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -333,8 +333,8 @@ struct GPU::Impl { return; } - if (cdma_pushers.find(id) == cdma_pushers.end()) { - cdma_pushers[id] = std::make_unique(gpu); + if (!cdma_pushers.contains(id)) { + cdma_pushers.insert_or_assign(id, std::make_unique(gpu)); } // SubmitCommandBuffer would make the nvdec operations async, this is not currently working @@ -345,8 +345,9 @@ struct GPU::Impl { /// Frees the CDMAPusher instance to free up resources void ClearCdmaInstance(u32 id) { - if (cdma_pushers.find(id) != cdma_pushers.end()) { - cdma_pushers.erase(id); + const auto iter = cdma_pushers.find(id); + if (iter != cdma_pushers.end()) { + cdma_pushers.erase(iter); } } -- cgit v1.2.3 From 47a724780fe1e24bbbd157b1cc821e2232e832d3 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 8 Dec 2021 10:55:11 -0500 Subject: renderer_vulkan: Add R16G16_UINT - Used by Immortals Fenyx Rising --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 31adada56..e38cfbc6c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -162,7 +162,7 @@ struct FormatTuple { {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT - {VK_FORMAT_UNDEFINED}, // R16G16_UINT + {VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 70c52aaac..7bf5b6578 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -130,6 +130,7 @@ std::unordered_map GetFormatProperties(vk::Physica VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_SNORM, -- cgit v1.2.3 From ae4869650a4056337e48d6b8c1ddf87240a2bfac Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Thu, 9 Dec 2021 13:53:53 -0500 Subject: maxwell_to_vk: Add ASTC_2D_8X5_UNORM - Used by Lego City Undercover --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index e38cfbc6c..68ab662d5 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -176,7 +176,7 @@ struct FormatTuple { {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM - {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM + {VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB -- cgit v1.2.3 From 159842649392a2328de5f258ce742c4361f51f60 Mon Sep 17 00:00:00 2001 From: Feng Chen Date: Fri, 10 Dec 2021 12:03:34 +0800 Subject: Fix blit image/view not compatible --- src/video_core/texture_cache/texture_cache.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e195b1e98..5aaeb16ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1137,8 +1137,13 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( } while (has_deleted_images); const ImageBase& src_image = slot_images[src_id]; const ImageBase& dst_image = slot_images[dst_id]; + const bool native_bgr = runtime.HasNativeBgr(); if (GetFormatType(dst_info.format) != GetFormatType(dst_image.info.format) || - GetFormatType(src_info.format) != GetFormatType(src_image.info.format)) { + GetFormatType(src_info.format) != GetFormatType(src_image.info.format) || + !VideoCore::Surface::IsViewCompatible(dst_info.format, dst_image.info.format, false, + native_bgr) || + !VideoCore::Surface::IsViewCompatible(src_info.format, src_image.info.format, false, + native_bgr)) { // Make sure the images match the expected format. do { has_deleted_images = false; -- cgit v1.2.3 From 14110230c70300b9553074c38828ae2e5f98b009 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 10 Dec 2021 22:44:24 -0500 Subject: maxwell_to_vk: Add ASTC_2D_5X4_UNORM --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 68ab662d5..751e4792b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -177,7 +177,7 @@ struct FormatTuple { {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM {VK_FORMAT_ASTC_8x5_UNORM_BLOCK}, // ASTC_2D_8X5_UNORM - {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM + {VK_FORMAT_ASTC_5x4_UNORM_BLOCK}, // ASTC_2D_5X4_UNORM {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB -- cgit v1.2.3 From a2d73eaa107bb5e3cd570e522fc69311468c2c89 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sun, 12 Dec 2021 17:43:10 -0700 Subject: video_core/codecs: skip decoders that use hw frames ... ... this would resolve some edge-cases where multiple devices are present and ffmpeg is unable to auto-supply the hw surfaces --- src/video_core/command_classes/codecs/codec.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 2a532b883..439c47209 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -130,6 +130,12 @@ bool Codec::CreateGpuAvDevice() { } if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { av_codec_ctx->pix_fmt = config->pix_fmt; + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) { + // skip zero-copy decoders, we don't currently support them + LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", + av_hwdevice_get_type_name(type), config->methods); + continue; + } LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); return true; } @@ -251,6 +257,9 @@ void Codec::Decode() { final_frame->format = PREFERRED_GPU_FMT; const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); + // null the hw frame context to prevent the buffer from being deleted + // and leaving a dangling reference in the av_codec_ctx + initial_frame->hw_frames_ctx = nullptr; } else { final_frame = std::move(initial_frame); } -- cgit v1.2.3 From dd72e4dce4641498bd7e73f09afd7d90961c435d Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sun, 12 Dec 2021 18:28:52 -0700 Subject: CI: fix CI on Linux --- src/video_core/command_classes/codecs/codec.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 439c47209..868b82f9b 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -257,9 +257,6 @@ void Codec::Decode() { final_frame->format = PREFERRED_GPU_FMT; const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); - // null the hw frame context to prevent the buffer from being deleted - // and leaving a dangling reference in the av_codec_ctx - initial_frame->hw_frames_ctx = nullptr; } else { final_frame = std::move(initial_frame); } -- cgit v1.2.3 From 2f32133ad5e13503c56bc5c910407a27cc23908b Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 15 Dec 2021 00:02:53 -0800 Subject: Revert "video_core/codecs: refactor ffmpeg searching and handling in cmake" --- src/video_core/command_classes/codecs/codec.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 868b82f9b..2a532b883 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -130,12 +130,6 @@ bool Codec::CreateGpuAvDevice() { } if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { av_codec_ctx->pix_fmt = config->pix_fmt; - if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) { - // skip zero-copy decoders, we don't currently support them - LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", - av_hwdevice_get_type_name(type), config->methods); - continue; - } LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); return true; } -- cgit v1.2.3