diff options
author | Mai M <mathew1800@gmail.com> | 2021-06-23 14:03:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-23 14:03:01 +0200 |
commit | 17fff10e06e7935522a5a69705b9a750761aab79 (patch) | |
tree | 7e7b3ae9fedbc0fed85f6c5c58e92e8d047efd87 /src/video_core/texture_cache | |
parent | Merge pull request #6508 from ReinUsesLisp/bootmanager-stop-token (diff) | |
parent | Reaper: Set minimum cleaning limit on OGL. (diff) | |
download | yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.gz yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.bz2 yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.lz yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.xz yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.zst yuzu-17fff10e06e7935522a5a69705b9a750761aab79.zip |
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 37 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.h | 12 | ||||
-rw-r--r-- | src/video_core/texture_cache/slot_vector.h | 70 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 151 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 2 |
5 files changed, 259 insertions, 13 deletions
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 9914926b3..ad69d32d1 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie image_view_ids.push_back(image_view_id); } +bool ImageBase::IsSafeDownload() const noexcept { + // Skip images that were not modified from the GPU + if (False(flags & ImageFlagBits::GpuModified)) { + return false; + } + // Skip images that .are. modified from the CPU + // We don't want to write sensitive data from the guest + if (True(flags & ImageFlagBits::CpuModified)) { + return false; + } + if (info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + return false; + } + return true; +} + +void ImageBase::CheckBadOverlapState() { + if (False(flags & ImageFlagBits::BadOverlap)) { + return; + } + if (!overlapping_images.empty()) { + return; + } + flags &= ~ImageFlagBits::BadOverlap; +} + +void ImageBase::CheckAliasState() { + if (False(flags & ImageFlagBits::Alias)) { + return; + } + if (!aliased_images.empty()) { + return; + } + flags &= ~ImageFlagBits::Alias; +} + void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; ASSERT(lhs.info.type == rhs.info.type); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index b7f3b7e43..e326cab71 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked + + // Garbage Collection Flags + BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 9, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -44,11 +50,16 @@ struct ImageBase { void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); + [[nodiscard]] bool IsSafeDownload() const noexcept; + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { const VAddr overlap_end = overlap_cpu_addr + overlap_size; return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + void CheckBadOverlapState(); + void CheckAliasState(); + ImageInfo info; u32 guest_size_bytes = 0; @@ -72,6 +83,7 @@ struct ImageBase { std::vector<SubresourceBase> slice_subresources; std::vector<AliasedImage> aliased_images; + std::vector<ImageId> overlapping_images; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index eae3be6ea..6180b8c0e 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <bit> #include <concepts> #include <numeric> #include <type_traits> @@ -32,6 +33,60 @@ template <class T> requires std::is_nothrow_move_assignable_v<T>&& std::is_nothrow_move_constructible_v<T> class SlotVector { public: + class Iterator { + friend SlotVector<T>; + + public: + constexpr Iterator() = default; + + Iterator& operator++() noexcept { + const u64* const bitset = slot_vector->stored_bitset.data(); + const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64; + if (id.index < size) { + do { + ++id.index; + } while (id.index < size && !IsValid(bitset)); + if (id.index == size) { + id.index = SlotId::INVALID_INDEX; + } + } + return *this; + } + + Iterator operator++(int) noexcept { + const Iterator copy{*this}; + ++*this; + return copy; + } + + bool operator==(const Iterator& other) const noexcept { + return id.index == other.id.index; + } + + bool operator!=(const Iterator& other) const noexcept { + return id.index != other.id.index; + } + + std::pair<SlotId, T*> operator*() const noexcept { + return {id, std::addressof((*slot_vector)[id])}; + } + + T* operator->() const noexcept { + return std::addressof((*slot_vector)[id]); + } + + private: + Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept + : slot_vector{slot_vector_}, id{id_} {} + + bool IsValid(const u64* bitset) const noexcept { + return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; + } + + SlotVector<T>* slot_vector; + SlotId id; + }; + ~SlotVector() noexcept { size_t index = 0; for (u64 bits : stored_bitset) { @@ -70,6 +125,20 @@ public: ResetStorageBit(id.index); } + [[nodiscard]] Iterator begin() noexcept { + const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); + if (it == stored_bitset.end()) { + return end(); + } + const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin())); + const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))}; + return Iterator(this, first_id); + } + + [[nodiscard]] Iterator end() noexcept { + return Iterator(this, SlotId{SlotId::INVALID_INDEX}); + } + private: struct NonTrivialDummy { NonTrivialDummy() noexcept {} @@ -140,7 +209,6 @@ private: Entry* values = nullptr; size_t values_capacity = 0; - size_t values_size = 0; std::vector<u64> stored_bitset; std::vector<u32> free_list; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 59b7c678b..e7f8478b4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -20,8 +20,10 @@ #include "common/alignment.h" #include "common/common_funcs.h" +#include "common/common_sizes.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/dirty_flags.h" @@ -69,12 +71,17 @@ class TextureCache { static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; /// True when some copies have to be emulated static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + /// True when the API can provide info about the memory of the device. + static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; /// Image view ID for null descriptors static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; /// Sampler ID for bugged sampler ids static constexpr SamplerId NULL_SAMPLER_ID{0}; + static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB; + static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB; + using Runtime = typename P::Runtime; using Image = typename P::Image; using ImageAlloc = typename P::ImageAlloc; @@ -103,6 +110,9 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); + /// Runs the Garbage Collector. + void RunGarbageCollector(); + /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -333,6 +343,10 @@ private: std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; bool has_deleted_images = false; + u64 total_used_memory = 0; + u64 minimum_memory; + u64 expected_memory; + u64 critical_memory; SlotVector<Image> slot_images; SlotVector<ImageView> slot_image_views; @@ -353,6 +367,7 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; + typename SlotVector<Image>::Iterator deletion_iterator; }; template <class P> @@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // This way the null resource becomes a compile time constant void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); + + deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; + } else { + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB; + critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB; + minimum_memory = expected_memory; + } +} + +template <class P> +void TextureCache<P>::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; + } + } + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return overlap.frame_tick >= image->frame_tick; + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image); + } + UnregisterImage(image_id); + DeleteImage(image_id); + if (is_bad_overlap) { + ++num_iterations; + } + } + ++deletion_iterator; + } } template <class P> void TextureCache<P>::TickFrame() { - // Tick sentenced resources in this order to ensure they are destroyed in the right order + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + RunGarbageCollector(); + } sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); @@ -568,17 +666,7 @@ template <class P> void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { std::vector<ImageId> images; ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - // Skip images that were not modified from the GPU - if (False(image.flags & ImageFlagBits::GpuModified)) { - return; - } - // Skip images that .are. modified from the CPU - // We don't want to write sensitive data from the guest - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + if (!image.IsSafeDownload()) { return; } image.flags &= ~ImageFlagBits::GpuModified; @@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector<ImageId> overlap_ids; std::vector<ImageId> left_aliased_ids; std::vector<ImageId> right_aliased_ids; + std::vector<ImageId> bad_overlap_ids; ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { if (info.type != overlap.info.type) { return; @@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; } }); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); @@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId aliased_id : right_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; } for (const ImageId aliased_id : left_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; } RegisterImage(new_image_id); return new_image_id; @@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { image.flags |= ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); } template <class P> @@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { @@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { return other_alias.id == image_id; }); + other_image.CheckAliasState(); ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", num_removed_aliases); } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } for (const ImageViewId image_view_id : image_view_ids) { sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); slot_image_views.erase(image_view_id); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 6835fd747..4efe042b6 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span<const u8> src = input.subspan(host_offset); + gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, num_tiles.depth, block.height, block.depth); |