diff options
Diffstat (limited to 'src/video_core')
18 files changed, 128 insertions, 182 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7bfd57369..d350c9b36 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -570,13 +570,12 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am ForEachWrittenRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. common_ranges.subtract(subtract_interval); - bool atleast_1_download = tmp_intervals.size() != 0; - for (const IntervalType add_interval : tmp_intervals) { + const bool has_new_downloads = tmp_intervals.size() != 0; + for (const IntervalType& add_interval : tmp_intervals) { common_ranges.add(add_interval); } - runtime.CopyBuffer(dest_buffer, src_buffer, copies); - if (atleast_1_download) { + if (has_new_downloads) { dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); } std::vector<u8> tmp_buffer(amount); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c7ec1eac9..67388d980 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -82,41 +82,41 @@ void MaxwellDMA::Launch() { } void MaxwellDMA::CopyPitchToPitch() { - // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D - // buffer of length `line_length_in`. - // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). - auto& accelerate = rasterizer->AccessAccelerateDMA(); - if (!regs.launch_dma.multi_line_enable) { - const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 && - regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; - // TODO: allow multisized components. - if (is_buffer_clear) { - ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); - memory_manager.WriteBlockUnsafe(regs.offset_out, - reinterpret_cast<u8*>(tmp_buffer.data()), - regs.line_length_in * sizeof(u32)); - return; - } - UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); - if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector<u8> tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); + // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions + // (line_length_in, line_count). + // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in. + const bool remap_enabled = regs.launch_dma.remap_enable != 0; + if (regs.launch_dma.multi_line_enable) { + UNIMPLEMENTED_IF(remap_enabled); + + // Perform a line-by-line copy. + // We're going to take a subrect of size (line_length_in, line_count) from the source + // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock + // does that for us. + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; + const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; + memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); } return; } - - UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); - - // Perform a line-by-line copy. - // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. - // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. - for (u32 line = 0; line < regs.line_count; ++line) { - const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; - const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; - memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); + // TODO: allow multisized components. + auto& accelerate = rasterizer->AccessAccelerateDMA(); + const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; + const bool is_buffer_clear = remap_enabled && is_const_a_dst; + if (is_buffer_clear) { + ASSERT(regs.remap_const.component_size_minus_one == 3); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); + memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()), + regs.line_length_in * sizeof(u32)); + return; + } + UNIMPLEMENTED_IF(remap_enabled); + if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { + std::vector<u8> tmp_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); + memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); } } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 9e457ae16..a04514425 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -175,7 +175,7 @@ public: static_assert(sizeof(LaunchDMA) == 4); struct RemapConst { - enum Swizzle : u32 { + enum class Swizzle : u32 { SRC_X = 0, SRC_Y = 1, SRC_Z = 2, diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c9cff7450..20d748c12 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -6,7 +6,6 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp - opengl_copy_bgra.comp opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp deleted file mode 100644 index 2571a4abf..000000000 --- a/src/video_core/host_shaders/opengl_copy_bgra.comp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 430 core - -layout (local_size_x = 4, local_size_y = 4) in; - -layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input; -layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output; - -void main() { - vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID)); - imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra); -} diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 07a995f7d..187a28e4d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -147,8 +147,7 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT, - &value); + static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value); } void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b0aee6cc1..54dae2c41 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -461,7 +461,7 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { return false; } - if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { + if (IsPixelFormatBGR(dst.info.format) != IsPixelFormatBGR(src.info.format)) { return false; } return true; @@ -473,7 +473,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, ASSERT(src.info.type == ImageType::e3D); util_shaders.CopyBC4(dst, src, copies); } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { - util_shaders.CopyBGR(dst, src, copies); + bgr_copy_pass.CopyBGR(dst, src, copies); } else { UNREACHABLE(); } @@ -1112,4 +1112,37 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM framebuffer.handle = handle; } +void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies) { + static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; + const u32 requested_pbo_size = + std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); + + if (bgr_pbo_size < requested_pbo_size) { + bgr_pbo.Create(); + bgr_pbo_size = requested_pbo_size; + glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); + } + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + + // Copy from source to PBO + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); + glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.src_subresource.num_layers, src_image.GlFormat(), + src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr); + + // Copy from PBO to destination in desired GL format + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle); + glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.dst_subresource.num_layers, dst_image.GlFormat(), + dst_image.GlType(), nullptr); + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 4a4f6301c..c498a8a8f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -47,6 +47,19 @@ struct FormatProperties { bool is_compressed; }; +class BGRCopyPass { +public: + BGRCopyPass() = default; + ~BGRCopyPass() = default; + + void CopyBGR(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies); + +private: + OGLBuffer bgr_pbo; + size_t bgr_pbo_size{}; +}; + class TextureCacheRuntime { friend Framebuffer; friend Image; @@ -118,6 +131,7 @@ private: const Device& device; StateTracker& state_tracker; UtilShaders util_shaders; + BGRCopyPass bgr_copy_pass; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; bool has_broken_texture_view_formats = false; @@ -162,6 +176,14 @@ public: return texture.handle; } + GLuint GlFormat() const noexcept { + return gl_format; + } + + GLuint GlType() const noexcept { + return gl_type; + } + private: void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 672f94bfc..39158aa3e 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -52,7 +52,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, // B8G8R8A8_UNORM {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT @@ -81,7 +81,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, // B8G8R8A8_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 333f35a1c..897c380b3 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -14,7 +14,6 @@ #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" #include "video_core/host_shaders/opengl_copy_bc4_comp.h" -#include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -44,11 +43,6 @@ namespace { OGLProgram MakeProgram(std::string_view source) { return CreateProgram(source, GL_COMPUTE_SHADER); } - -size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { - return static_cast<size_t>(copy.extent.width * copy.extent.height * - copy.src_subresource.num_layers); -} } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) @@ -56,7 +50,6 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), - copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); @@ -255,43 +248,6 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im program_manager.RestoreGuestCompute(); } -void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, - std::span<const VideoCommon::ImageCopy> copies) { - static constexpr GLuint BINDING_INPUT_IMAGE = 0; - static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; - static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; - const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); - switch (bytes_per_block) { - case 2: - // BGR565 copy - for (const ImageCopy& copy : copies) { - ASSERT(copy.src_offset == zero_offset); - ASSERT(copy.dst_offset == zero_offset); - bgr_copy_pass.Execute(dst_image, src_image, copy); - } - break; - case 4: { - // BGRA8 copy - program_manager.BindComputeProgram(copy_bgra_program.handle); - constexpr GLenum FORMAT = GL_RGBA8; - for (const ImageCopy& copy : copies) { - ASSERT(copy.src_offset == zero_offset); - ASSERT(copy.dst_offset == zero_offset); - glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), - copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); - glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), - copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); - glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); - } - program_manager.RestoreGuestCompute(); - break; - } - default: - UNREACHABLE(); - break; - } -} - GLenum StoreFormat(u32 bytes_per_block) { switch (bytes_per_block) { case 1: @@ -309,36 +265,4 @@ GLenum StoreFormat(u32 bytes_per_block) { return GL_R8UI; } -void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, - const ImageCopy& copy) { - if (CopyBufferCreationNeeded(copy)) { - CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); - } - // Copy from source to PBO - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); - glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, - static_cast<GLsizei>(bgr16_pbo_size), nullptr); - - // Copy from PBO to destination in reverse order - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); - glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, - nullptr); -} - -bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { - return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); -} - -void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { - bgr16_pbo.Create(); - bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); - glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index ef881e35f..5de95ea7a 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -19,22 +19,6 @@ class ProgramManager; struct ImageBufferMap; -class Bgr565CopyPass { -public: - Bgr565CopyPass() = default; - ~Bgr565CopyPass() = default; - - void Execute(const Image& dst_image, const Image& src_image, - const VideoCommon::ImageCopy& copy); - -private: - [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); - void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); - - OGLBuffer bgr16_pbo; - size_t bgr16_pbo_size{}; -}; - class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); @@ -55,9 +39,6 @@ public: void CopyBC4(Image& dst_image, Image& src_image, std::span<const VideoCommon::ImageCopy> copies); - void CopyBGR(Image& dst_image, Image& src_image, - std::span<const VideoCommon::ImageCopy> copies); - private: ProgramManager& program_manager; @@ -67,10 +48,7 @@ private: OGLProgram block_linear_unswizzle_2d_program; OGLProgram block_linear_unswizzle_3d_program; OGLProgram pitch_unswizzle_program; - OGLProgram copy_bgra_program; OGLProgram copy_bc4_program; - - Bgr565CopyPass bgr_copy_pass; }; GLenum StoreFormat(u32 bytes_per_block); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index adb6b7a3b..74822814d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr<Core::Frontend::GraphicsContext> context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7c0f91007..11cd41ad7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -507,8 +507,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { vertex_attributes.push_back({ .location = static_cast<u32>(index), .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT + : VK_FORMAT_R32_UINT, .offset = 0, }); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bd22e4e83..85fc1712f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -212,7 +212,6 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr<CommandChunk> chunk; - std::jthread worker_thread; State state; @@ -226,6 +225,7 @@ private: std::mutex work_mutex; std::condition_variable_any work_cv; std::condition_variable wait_cv; + std::jthread worker_thread; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ff979a7ac..3b87640b5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -127,7 +127,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; if (info.type == ImageType::e2D && info.resources.layers >= 6 && - info.size.width == info.size.height) { + info.size.width == info.size.height && !device.HasBrokenCubeImageCompability()) { flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; } if (info.type == ImageType::e3D) { diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 74cd3c9d8..50df06409 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -31,8 +31,8 @@ struct SlotId { }; template <class T> -requires std::is_nothrow_move_assignable_v<T>&& - std::is_nothrow_move_constructible_v<T> class SlotVector { +requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T> +class SlotVector { public: class Iterator { friend SlotVector<T>; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c2ec9f76a..6388ed2eb 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -588,22 +588,27 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR ext_extended_dynamic_state = false; } } - sets_per_pool = 64; - if (driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE) { + + const bool is_amd = + driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; + if (is_amd) { // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. sets_per_pool = 96; - } - - const bool is_amd = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || - driver_id == VK_DRIVER_ID_MESA_RADV || - driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; - if (ext_sampler_filter_minmax && is_amd) { - // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. + // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken. if (!is_float16_supported) { LOG_WARNING( Render_Vulkan, - "Blacklisting AMD GCN4 and lower for VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME"); + "AMD GCN4 and earlier do not properly support VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); + has_broken_cube_compatibility = true; + } + } + const bool is_amd_or_radv = is_amd || driver_id == VK_DRIVER_ID_MESA_RADV; + if (ext_sampler_filter_minmax && is_amd_or_radv) { + // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. + if (!is_float16_supported) { + LOG_WARNING(Render_Vulkan, + "Blacklisting AMD GCN4 and earlier for VK_EXT_sampler_filter_minmax"); ext_sampler_filter_minmax = false; } } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index bc180a32a..d9e74f1aa 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -309,6 +309,11 @@ public: return has_renderdoc || has_nsight_graphics; } + /// Returns true when the device does not properly support cube compatibility. + bool HasBrokenCubeImageCompability() const { + return has_broken_cube_compatibility; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -417,6 +422,7 @@ private: bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached |