diff options
Diffstat (limited to 'src/video_core/texture_cache/texture_cache.h')
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 107 |
1 files changed, 92 insertions, 15 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..099b2ae1b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,14 +50,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 4) / 10; - const u64 possible_critical_memory = (device_memory * 7) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); - minimum_memory = 0; + const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); + const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; + const s64 min_spacing_critical = device_memory - 1_GiB; + const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_threshold) / 10; + const s64 min_vacancy_critical = (3 * mem_threshold) / 10; + expected_memory = static_cast<u64>( + std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), + DEFAULT_EXPECTED_MEMORY)); + critical_memory = static_cast<u64>( + std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), + DEFAULT_CRITICAL_MEMORY)); + minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); } else { - // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; @@ -66,18 +72,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& template <class P> void TextureCache<P>::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); - const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + bool high_priority_mode = total_used_memory >= expected_memory; + bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; + size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); + const auto clean_up = [this, &num_iterations, &high_priority_mode, + &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { return true; } --num_iterations; auto& image = slot_images[image_id]; - const bool must_download = image.IsSafeDownload(); - if (!high_priority_mode && must_download) { + const bool must_download = + image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); + if (!high_priority_mode && + (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { return false; } if (must_download) { @@ -92,6 +101,18 @@ void TextureCache<P>::RunGarbageCollector() { } UnregisterImage(image_id); DeleteImage(image_id, image.scale_tick > frame_tick + 5); + if (total_used_memory < critical_memory) { + if (aggressive_mode) { + // Sink the aggresiveness. + num_iterations >>= 2; + aggressive_mode = false; + return false; + } + if (high_priority_mode && total_used_memory < expected_memory) { + num_iterations >>= 1; + high_priority_mode = false; + } + } return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); @@ -99,6 +120,10 @@ void TextureCache<P>::RunGarbageCollector() { template <class P> void TextureCache<P>::TickFrame() { + // If we can obtain the memory info, use it instead of the estimate. + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } if (total_used_memory > minimum_memory) { RunGarbageCollector(); } @@ -106,6 +131,7 @@ void TextureCache<P>::TickFrame() { sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); runtime.TickFrame(); + critical_gc = 0; ++frame_tick; } @@ -412,6 +438,23 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { } template <class P> +void TextureCache<P>::CachedWriteMemory(VAddr cpu_addr, size_t size) { + const VAddr new_cpu_addr = Common::AlignDown(cpu_addr, CPU_PAGE_SIZE); + const size_t new_size = Common::AlignUp(size + cpu_addr - new_cpu_addr, CPU_PAGE_SIZE); + ForEachImageInRegion(new_cpu_addr, new_size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CachedCpuModified)) { + return; + } + image.flags |= ImageFlagBits::CachedCpuModified; + cached_cpu_invalidate.insert(image_id); + + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + }); +} + +template <class P> void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { std::vector<ImageId> images; ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { @@ -469,6 +512,18 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { } template <class P> +void TextureCache<P>::FlushCachedWrites() { + for (ImageId image_id : cached_cpu_invalidate) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::CachedCpuModified)) { + image.flags &= ~ImageFlagBits::CachedCpuModified; + image.flags |= ImageFlagBits::CpuModified; + } + } + cached_cpu_invalidate.clear(); +} + +template <class P> void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy) { @@ -1052,6 +1107,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + new_image.flags |= ImageFlagBits::GpuModified; + new_image.modification_tick = + std::max(overlap.modification_tick, new_image.modification_tick); + } if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { @@ -1414,6 +1474,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { + RunGarbageCollector(); + critical_gc++; + } image.lru_index = lru_cache.Insert(image_id, frame_tick); ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, @@ -1525,6 +1589,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { template <class P> void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); + if (True(image.flags & ImageFlagBits::CachedCpuModified)) { + return; + } image.flags |= ImageFlagBits::Tracked; if (False(image.flags & ImageFlagBits::Sparse)) { rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); @@ -1581,6 +1648,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); + if (True(image.flags & ImageFlagBits::CachedCpuModified)) { + cached_cpu_invalidate.erase(image_id); + } const GPUVAddr gpu_addr = image.gpu_addr; const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it == image_allocs_table.end()) { @@ -1704,6 +1774,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); + if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { + image.flags |= ImageFlagBits::GpuModified; + } } } if (aliased_images.empty()) { @@ -1744,7 +1817,11 @@ template <class P> void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { Image& image = slot_images[image_id]; if (invalidate) { - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (True(image.flags & ImageFlagBits::CachedCpuModified)) { + cached_cpu_invalidate.erase(image_id); + } + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified | + ImageFlagBits::CachedCpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { TrackImage(image, image_id); } |