diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 5 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.h | 55 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 26 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 16 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 60 |
6 files changed, 104 insertions, 60 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index da8eab7ee..279f0daa1 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -109,10 +109,11 @@ void MaxwellDMA::Launch() { const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, + regs.remap_const.remap_consta_value); read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); - std::ranges::fill(span, regs.remap_consta_value); + std::ranges::fill(span, regs.remap_const.remap_consta_value); memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(read_buffer.data()), regs.line_length_in * sizeof(u32)); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 69e26cb32..1a43e24b6 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -214,14 +214,15 @@ public: NO_WRITE = 6, }; - PackedGPUVAddr address; + u32 remap_consta_value; + u32 remap_constb_value; union { + BitField<0, 12, u32> dst_components_raw; BitField<0, 3, Swizzle> dst_x; BitField<4, 3, Swizzle> dst_y; BitField<8, 3, Swizzle> dst_z; BitField<12, 3, Swizzle> dst_w; - BitField<0, 12, u32> dst_components_raw; BitField<16, 2, u32> component_size_minus_one; BitField<20, 2, u32> num_src_components_minus_one; BitField<24, 2, u32> num_dst_components_minus_one; @@ -274,55 +275,57 @@ private: struct Regs { union { struct { - u32 reserved[0x40]; + INSERT_PADDING_BYTES_NOINIT(0x100); u32 nop; - u32 reserved01[0xf]; + INSERT_PADDING_BYTES_NOINIT(0x3C); u32 pm_trigger; - u32 reserved02[0x3f]; + INSERT_PADDING_BYTES_NOINIT(0xFC); Semaphore semaphore; - u32 reserved03[0x2]; + INSERT_PADDING_BYTES_NOINIT(0x8); RenderEnable render_enable; PhysMode src_phys_mode; PhysMode dst_phys_mode; - u32 reserved04[0x26]; + INSERT_PADDING_BYTES_NOINIT(0x98); LaunchDMA launch_dma; - u32 reserved05[0x3f]; + INSERT_PADDING_BYTES_NOINIT(0xFC); PackedGPUVAddr offset_in; PackedGPUVAddr offset_out; s32 pitch_in; s32 pitch_out; u32 line_length_in; u32 line_count; - u32 reserved06[0xb6]; - u32 remap_consta_value; - u32 remap_constb_value; + INSERT_PADDING_BYTES_NOINIT(0x2E0); RemapConst remap_const; DMA::Parameters dst_params; - u32 reserved07[0x1]; + INSERT_PADDING_BYTES_NOINIT(0x4); DMA::Parameters src_params; - u32 reserved08[0x275]; + INSERT_PADDING_BYTES_NOINIT(0x9D4); u32 pm_trigger_end; - u32 reserved09[0x3ba]; + INSERT_PADDING_BYTES_NOINIT(0xEE8); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; + static_assert(sizeof(Regs) == NUM_REGS * 4); #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ + static_assert(offsetof(MaxwellDMA::Regs, field_name) == position, \ "Field " #field_name " has invalid position") - ASSERT_REG_POSITION(launch_dma, 0xC0); - ASSERT_REG_POSITION(offset_in, 0x100); - ASSERT_REG_POSITION(offset_out, 0x102); - ASSERT_REG_POSITION(pitch_in, 0x104); - ASSERT_REG_POSITION(pitch_out, 0x105); - ASSERT_REG_POSITION(line_length_in, 0x106); - ASSERT_REG_POSITION(line_count, 0x107); - ASSERT_REG_POSITION(remap_const, 0x1C0); - ASSERT_REG_POSITION(dst_params, 0x1C3); - ASSERT_REG_POSITION(src_params, 0x1CA); - + ASSERT_REG_POSITION(semaphore, 0x240); + ASSERT_REG_POSITION(render_enable, 0x254); + ASSERT_REG_POSITION(src_phys_mode, 0x260); + ASSERT_REG_POSITION(launch_dma, 0x300); + ASSERT_REG_POSITION(offset_in, 0x400); + ASSERT_REG_POSITION(offset_out, 0x408); + ASSERT_REG_POSITION(pitch_in, 0x410); + ASSERT_REG_POSITION(pitch_out, 0x414); + ASSERT_REG_POSITION(line_length_in, 0x418); + ASSERT_REG_POSITION(line_count, 0x41C); + ASSERT_REG_POSITION(remap_const, 0x700); + ASSERT_REG_POSITION(dst_params, 0x70C); + ASSERT_REG_POSITION(src_params, 0x728); + ASSERT_REG_POSITION(pm_trigger_end, 0x1114); #undef ASSERT_REG_POSITION }; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 35bf80ea3..208e88533 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -185,7 +185,7 @@ struct FormatTuple { {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB - {VK_FORMAT_R4G4B4A4_UNORM_PACK16}, // A4B4G4R4_UNORM + {VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT}, // A4B4G4R4_UNORM {VK_FORMAT_R4G4_UNORM_PACK8}, // G4R4_UNORM {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d935dd43e..1f9e7acaa 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -590,7 +590,7 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im } void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4>& swizzle, - bool emulate_bgr565) { + bool emulate_bgr565, bool emulate_a4b4g4r4) { switch (format) { case PixelFormat::A1B5G5R5_UNORM: std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed); @@ -606,6 +606,11 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4 case PixelFormat::G4R4_UNORM: std::ranges::transform(swizzle, swizzle.begin(), SwapGreenRed); break; + case PixelFormat::A4B4G4R4_UNORM: + if (emulate_a4b4g4r4) { + std::ranges::reverse(swizzle); + } + break; default: break; } @@ -1034,15 +1039,27 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst dst_region, src_region, filter, operation); return; } + ASSERT(src.format == dst.format); if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - if (!device.IsBlitDepthStencilSupported()) { + const auto format = src.format; + const auto can_blit_depth_stencil = [this, format] { + switch (format) { + case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: + case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: + return device.IsBlitDepth24Stencil8Supported(); + case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: + return device.IsBlitDepth32Stencil8Supported(); + default: + UNREACHABLE(); + } + }(); + if (!can_blit_depth_stencil) { UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), dst_region, src_region, filter, operation); return; } } - ASSERT(src.format == dst.format); ASSERT(!(is_dst_msaa && !is_src_msaa)); ASSERT(operation == Fermi2D::Operation::SrcCopy); @@ -1639,7 +1656,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }; if (!info.IsRenderTarget()) { swizzle = info.Swizzle(); - TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565()); + TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565(), + !device->IsExt4444FormatsSupported()); if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 617417040..18185610f 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -76,6 +76,11 @@ constexpr std::array VK_FORMAT_R32G32B32_SFLOAT{ VK_FORMAT_UNDEFINED, }; +constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_UNDEFINED, +}; + } // namespace Alternatives enum class NvidiaArchitecture { @@ -110,6 +115,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { return Alternatives::R8G8B8_SSCALED.data(); case VK_FORMAT_R32G32B32_SFLOAT: return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data(); + case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT: + return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data(); default: return nullptr; } @@ -238,6 +245,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_R32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, VK_FORMAT_R4G4_UNORM_PACK8, VK_FORMAT_R5G5B5A1_UNORM_PACK16, VK_FORMAT_R5G6B5_UNORM_PACK16, @@ -420,7 +428,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR first_next = &diagnostics_nv; } - is_blit_depth_stencil_supported = TestDepthStencilBlits(); + is_blit_depth24_stencil8_supported = TestDepthStencilBlits(VK_FORMAT_D24_UNORM_S8_UINT); + is_blit_depth32_stencil8_supported = TestDepthStencilBlits(VK_FORMAT_D32_SFLOAT_S8_UINT); is_optimal_astc_supported = ComputeIsOptimalAstcSupported(); is_warp_potentially_bigger = !extensions.subgroup_size_control || properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize; @@ -774,14 +783,13 @@ bool Device::ComputeIsOptimalAstcSupported() const { return true; } -bool Device::TestDepthStencilBlits() const { +bool Device::TestDepthStencilBlits(VkFormat format) const { static constexpr VkFormatFeatureFlags required_features = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; const auto test_features = [](VkFormatProperties props) { return (props.optimalTilingFeatures & required_features) == required_features; }; - return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && - test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); + return test_features(format_properties.at(format)); } bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 488fdd313..8c5355a28 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -45,6 +45,7 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ + FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \ FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \ @@ -97,6 +98,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ @@ -144,6 +146,7 @@ VK_DEFINE_HANDLE(VmaAllocator) #define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \ FEATURE_NAME(custom_border_color, customBorderColors) \ FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \ + FEATURE_NAME(format_a4b4g4r4, formatA4B4G4R4) \ FEATURE_NAME(index_type_uint8, indexTypeUint8) \ FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \ FEATURE_NAME(provoking_vertex, provokingVertexLast) \ @@ -359,9 +362,14 @@ public: return features.features.depthBounds; } - /// Returns true when blitting from and to depth stencil images is supported. - bool IsBlitDepthStencilSupported() const { - return is_blit_depth_stencil_supported; + /// Returns true when blitting from and to D24S8 images is supported. + bool IsBlitDepth24Stencil8Supported() const { + return is_blit_depth24_stencil8_supported; + } + + /// Returns true when blitting from and to D32S8 images is supported. + bool IsBlitDepth32Stencil8Supported() const { + return is_blit_depth32_stencil8_supported; } /// Returns true if the device supports VK_NV_viewport_swizzle. @@ -488,6 +496,11 @@ public: return extensions.extended_dynamic_state3; } + /// Returns true if the device supports VK_EXT_4444_formats. + bool IsExt4444FormatsSupported() const { + return features.format_a4b4g4r4.formatA4B4G4R4; + } + /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3BlendingSupported() const { return dynamic_state3_blending; @@ -666,7 +679,7 @@ private: bool ComputeIsOptimalAstcSupported() const; /// Returns true if the device natively supports blitting depth stencil images. - bool TestDepthStencilBlits() const; + bool TestDepthStencilBlits(VkFormat format) const; private: VkInstance instance; ///< Vulkan instance. @@ -730,25 +743,26 @@ private: VkPhysicalDeviceProperties2 properties2{}; // Misc features - bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats. - bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. - bool is_integrated{}; ///< Is GPU an iGPU. - bool is_virtual{}; ///< Is GPU a virtual GPU. - bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. - bool has_broken_compute{}; ///< Compute shaders can cause crashes - bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - bool supports_d24_depth{}; ///< Supports D24 depth buffers. - bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. - bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation - bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. - bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. - bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. - bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. - u64 device_access_memory{}; ///< Total size of device local memory in bytes. - u32 sets_per_pool{}; ///< Sets per Description Pool + bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats. + bool is_blit_depth24_stencil8_supported{}; ///< Support for blitting from and to D24S8. + bool is_blit_depth32_stencil8_supported{}; ///< Support for blitting from and to D32S8. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_integrated{}; ///< Is GPU an iGPU. + bool is_virtual{}; ///< Is GPU a virtual GPU. + bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. + bool has_broken_compute{}; ///< Compute shaders can cause crashes + bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. + bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. + u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 sets_per_pool{}; ///< Sets per Description Pool // Telemetry parameters std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. |