diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/macro/macro_hle.cpp | 7 | ||||
-rw-r--r-- | src/video_core/rasterizer_interface.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.cpp | 17 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.h | 27 | ||||
-rw-r--r-- | src/video_core/texture_cache/decode_bc.cpp | 50 | ||||
-rw-r--r-- | src/video_core/texture_cache/decode_bc.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 16 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 16 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.cpp | 4 |
13 files changed, 136 insertions, 44 deletions
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 046c8085e..46e853e04 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -327,12 +327,13 @@ public: explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { + const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); + auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU); - if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { + if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { Fallback(parameters); return; } - auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_byte_count = true; params.is_indexed = false; @@ -503,6 +504,8 @@ public: maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true); + + maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); } }; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index af1469147..49224ca85 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -173,5 +173,13 @@ public: virtual void BindChannel(Tegra::Control::ChannelState& channel) {} virtual void ReleaseChannel(s32 channel_id) {} + + /// Register the address as a Transform Feedback Object + virtual void RegisterTransformFeedback(GPUVAddr tfb_object_addr) {} + + /// Returns true when the rasterizer has Draw Transform Feedback capabilities + virtual bool HasDrawTransformFeedback() { + return false; + } }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b787b6994..517ac14dd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -376,4 +376,15 @@ void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, P *image_handles++ = buffer.View(offset, size, format); } +void BufferCacheRuntime::BindTransformFeedbackObject(GPUVAddr tfb_object_addr) { + OGLTransformFeedback& tfb_object = tfb_objects[tfb_object_addr]; + tfb_object.Create(); + glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, tfb_object.handle); +} + +GLuint BufferCacheRuntime::GetTransformFeedbackObject(GPUVAddr tfb_object_addr) { + ASSERT(tfb_objects.contains(tfb_object_addr)); + return tfb_objects[tfb_object_addr].handle; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 1e8708f59..2c18de166 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -5,6 +5,7 @@ #include <array> #include <span> +#include <unordered_map> #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache_base.h" @@ -121,6 +122,9 @@ public: void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + void BindTransformFeedbackObject(GPUVAddr tfb_object_addr); + GLuint GetTransformFeedbackObject(GPUVAddr tfb_object_addr); + u64 GetDeviceMemoryUsage() const; void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { @@ -233,6 +237,7 @@ private: u32 index_buffer_offset = 0; u64 device_access_memory; + std::unordered_map<GPUVAddr, OGLTransformFeedback> tfb_objects; }; struct BufferCacheParams { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 339950d2e..7a5fad735 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -309,6 +309,13 @@ void RasterizerOpenGL::DrawIndirect() { const auto& params = maxwell3d->draw_manager->GetIndirectParams(); buffer_cache.SetDrawIndirect(¶ms); PrepareDraw(params.is_indexed, [this, ¶ms](GLenum primitive_mode) { + if (params.is_byte_count) { + const GPUVAddr tfb_object_base_addr = params.indirect_start_address - 4U; + const GLuint tfb_object = + buffer_cache_runtime.GetTransformFeedbackObject(tfb_object_base_addr); + glDrawTransformFeedback(primitive_mode, tfb_object); + return; + } const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer(); const GLvoid* const gl_offset = reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset)); @@ -1371,6 +1378,10 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerOpenGL::RegisterTransformFeedback(GPUVAddr tfb_object_addr) { + buffer_cache_runtime.BindTransformFeedbackObject(tfb_object_addr); +} + AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_) : buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b79d7a70c..ce3460938 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -139,6 +139,12 @@ public: void ReleaseChannel(s32 channel_id) override; + void RegisterTransformFeedback(GPUVAddr tfb_object_addr) override; + + bool HasDrawTransformFeedback() override { + return true; + } + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index eae8fd110..1d2c9b70a 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -207,4 +207,21 @@ void OGLQuery::Release() { handle = 0; } +void OGLTransformFeedback::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glCreateTransformFeedbacks(1, &handle); +} + +void OGLTransformFeedback::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteTransformFeedbacks(1, &handle); + handle = 0; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 77362acd2..6ca8227bd 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -323,4 +323,31 @@ public: GLuint handle = 0; }; +class OGLTransformFeedback final { +public: + YUZU_NON_COPYABLE(OGLTransformFeedback); + + OGLTransformFeedback() = default; + + OGLTransformFeedback(OGLTransformFeedback&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLTransformFeedback() { + Release(); + } + + OGLTransformFeedback& operator=(OGLTransformFeedback&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + } // namespace OpenGL diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp index 3e26474a3..a018c6df4 100644 --- a/src/video_core/texture_cache/decode_bc.cpp +++ b/src/video_core/texture_cache/decode_bc.cpp @@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { } template <auto decompress, PixelFormat pixel_format> -void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, bool is_signed = false) { const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); - const u32 block_width = std::min(extent.width, BLOCK_SIZE); - const u32 block_height = std::min(extent.height, BLOCK_SIZE); - const u32 pitch = extent.width * out_bpp; + const u32 block_size = BlockSize(pixel_format); + const u32 width = copy.image_extent.width; + const u32 height = copy.image_extent.height * copy.image_subresource.num_layers; + const u32 depth = copy.image_extent.depth; + const u32 block_width = std::min(width, BLOCK_SIZE); + const u32 block_height = std::min(height, BLOCK_SIZE); + const u32 pitch = width * out_bpp; size_t input_offset = 0; size_t output_offset = 0; - for (u32 slice = 0; slice < extent.depth; ++slice) { - for (u32 y = 0; y < extent.height; y += block_height) { - size_t row_offset = 0; - for (u32 x = 0; x < extent.width; - x += block_width, row_offset += block_width * out_bpp) { - const u8* src = input.data() + input_offset; - u8* const dst = output.data() + output_offset + row_offset; + for (u32 slice = 0; slice < depth; ++slice) { + for (u32 y = 0; y < height; y += block_height) { + size_t src_offset = input_offset; + size_t dst_offset = output_offset; + for (u32 x = 0; x < width; x += block_width) { + const u8* src = input.data() + src_offset; + u8* const dst = output.data() + dst_offset; if constexpr (IsSigned(pixel_format)) { - decompress(src, dst, x, y, extent.width, extent.height, is_signed); + decompress(src, dst, x, y, width, height, is_signed); } else { - decompress(src, dst, x, y, extent.width, extent.height); + decompress(src, dst, x, y, width, height); } - input_offset += BlockSize(pixel_format); + src_offset += block_size; + dst_offset += block_width * out_bpp; } + input_offset += copy.buffer_row_length * block_size / block_width; output_offset += block_height * pitch; } } } -void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format) { switch (pixel_format) { case PixelFormat::BC1_RGBA_UNORM: case PixelFormat::BC1_RGBA_SRGB: - DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy); break; case PixelFormat::BC2_UNORM: case PixelFormat::BC2_SRGB: - DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy); break; case PixelFormat::BC3_UNORM: case PixelFormat::BC3_SRGB: - DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy); break; case PixelFormat::BC4_SNORM: case PixelFormat::BC4_UNORM: DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( - input, output, extent, pixel_format == PixelFormat::BC4_SNORM); + input, output, copy, pixel_format == PixelFormat::BC4_SNORM); break; case PixelFormat::BC5_SNORM: case PixelFormat::BC5_UNORM: DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( - input, output, extent, pixel_format == PixelFormat::BC5_SNORM); + input, output, copy, pixel_format == PixelFormat::BC5_SNORM); break; case PixelFormat::BC6H_SFLOAT: case PixelFormat::BC6H_UFLOAT: DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( - input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); + input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT); break; case PixelFormat::BC7_SRGB: case PixelFormat::BC7_UNORM: - DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy); break; default: LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h index 41d1ec0a3..4e3b9b8ac 100644 --- a/src/video_core/texture_cache/decode_bc.h +++ b/src/video_core/texture_cache/decode_bc.h @@ -13,7 +13,7 @@ namespace VideoCommon { [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); -void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format); } // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 15596c925..fcf70068e 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory std::span<u8> output) { const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const Extent2D tile_size = DefaultBlockSize(info.format); const Extent3D size = info.size; if (info.type == ImageType::Linear) { @@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory return {{ .buffer_offset = 0, .buffer_size = guest_size_bytes, - .buffer_row_length = info.pitch >> bpp_log2, + .buffer_row_length = info.pitch * tile_size.width >> bpp_log2, .buffer_image_height = size.height, .image_subresource = { @@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory const LevelInfo level_info = MakeLevelInfo(info); const s32 num_layers = info.resources.layers; const s32 num_levels = info.resources.levels; - const Extent2D tile_size = DefaultBlockSize(info.format); const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels); @@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 const auto input_offset = input.subspan(copy.buffer_offset); copy.buffer_offset = output_offset; - copy.buffer_row_length = mip_size.width; - copy.buffer_image_height = mip_size.height; const auto recompression_setting = Settings::values.astc_recompression.GetValue(); const bool astc = IsPixelFormatASTC(info.format); @@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 bpp_div; output_offset += static_cast<u32>(copy.buffer_size); } else { - const Extent3D image_extent{ - .width = copy.image_extent.width, - .height = copy.image_extent.height * copy.image_subresource.num_layers, - .depth = copy.image_extent.depth, - }; - DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); + DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format); output_offset += copy.image_extent.width * copy.image_extent.height * copy.image_subresource.num_layers * ConvertedBytesPerBlock(info.format); } + + copy.buffer_row_length = mip_size.width; + copy.buffer_image_height = mip_size.height; } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index a6fbca69e..727bbd98d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -755,10 +755,10 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags // The wanted format is not supported by hardware, search for alternatives const VkFormat* alternatives = GetFormatAlternatives(wanted_format); if (alternatives == nullptr) { - ASSERT_MSG(false, - "Format={} with usage={} and type={} has no defined alternatives and host " - "hardware does not support it", - wanted_format, wanted_usage, format_type); + LOG_ERROR(Render_Vulkan, + "Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + wanted_format, wanted_usage, format_type); return wanted_format; } @@ -774,10 +774,10 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags } // No alternatives found, panic - ASSERT_MSG(false, - "Format={} with usage={} and type={} is not supported by the host hardware and " - "doesn't support any of the alternatives", - wanted_format, wanted_usage, format_type); + LOG_ERROR(Render_Vulkan, + "Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + wanted_format, wanted_usage, format_type); return wanted_format; } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2f78b8af0..074aed964 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -246,7 +246,9 @@ void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjec .objectHandle = reinterpret_cast<u64>(handle), .pObjectName = name, }; - Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); + if (dld->vkSetDebugUtilsObjectNameEXT) { + Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); + } } } // Anonymous namespace |