diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.cpp | 4 | ||||
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 295 | ||||
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 141 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/codec.cpp | 93 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/codec.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 9 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 7 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 148 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 17 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 15 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 5 |
13 files changed, 460 insertions, 286 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 40db243d2..4b4f7061b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "common/microprofile.h" +#include "video_core/buffer_cache/buffer_cache_base.h" +#include "video_core/control/channel_state_cache.inc" namespace VideoCommon { @@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12 MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); +template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>; + } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 65494097b..427afd5fc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -63,18 +63,27 @@ void BufferCache<P>::RunGarbageCollector() { template <class P> void BufferCache<P>::TickFrame() { + // Homebrew console apps don't create or bind any channels, so this will be nullptr. + if (!channel_state) { + return; + } + // Calculate hits and shots and move hit bits to the right - const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); - const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); - std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, - uniform_cache_hits.begin() + 1); - std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, - uniform_cache_shots.begin() + 1); - uniform_cache_hits[0] = 0; - uniform_cache_shots[0] = 0; + const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), + channel_state->uniform_cache_hits.end()); + const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(), + channel_state->uniform_cache_shots.end()); + std::copy_n(channel_state->uniform_cache_hits.begin(), + channel_state->uniform_cache_hits.size() - 1, + channel_state->uniform_cache_hits.begin() + 1); + std::copy_n(channel_state->uniform_cache_shots.begin(), + channel_state->uniform_cache_shots.size() - 1, + channel_state->uniform_cache_shots.begin() + 1); + channel_state->uniform_cache_hits[0] = 0; + channel_state->uniform_cache_shots[0] = 0; const bool skip_preferred = hits * 256 < shots * 251; - uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; // If we can obtain the memory info, use it instead of the estimate. if (runtime.CanReportMemoryUsage()) { @@ -164,10 +173,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am BufferId buffer_a; BufferId buffer_b; do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); @@ -272,30 +281,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr .size = size, .buffer_id = BufferId{}, }; - uniform_buffers[stage][index] = binding; + channel_state->uniform_buffers[stage][index] = binding; } template <class P> void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) { - uniform_buffers[stage][index] = NULL_BINDING; + channel_state->uniform_buffers[stage][index] = NULL_BINDING; } template <class P> void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { MICROPROFILE_SCOPE(GPU_PrepareBuffers); do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; DoUpdateGraphicsBuffers(is_indexed); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> void BufferCache<P>::UpdateComputeBuffers() { MICROPROFILE_SCOPE(GPU_PrepareBuffers); do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; DoUpdateComputeBuffers(); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> @@ -338,98 +347,102 @@ template <class P> void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffer_masks != mask) { + if (channel_state->enabled_uniform_buffer_masks != mask) { if constexpr (IS_OPENGL) { - fast_bound_uniform_buffers.fill(0); + channel_state->fast_bound_uniform_buffers.fill(0); } - dirty_uniform_buffers.fill(~u32{0}); - uniform_buffer_binding_sizes.fill({}); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->uniform_buffer_binding_sizes.fill({}); } } - enabled_uniform_buffer_masks = mask; - uniform_buffer_sizes = sizes; + channel_state->enabled_uniform_buffer_masks = mask; + channel_state->uniform_buffer_sizes = sizes; } template <class P> void BufferCache<P>::SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes) { - enabled_compute_uniform_buffer_mask = mask; - compute_uniform_buffer_sizes = sizes; + channel_state->enabled_compute_uniform_buffer_mask = mask; + channel_state->compute_uniform_buffer_sizes = sizes; } template <class P> void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { - enabled_storage_buffers[stage] = 0; - written_storage_buffers[stage] = 0; + channel_state->enabled_storage_buffers[stage] = 0; + channel_state->written_storage_buffers[stage] = 0; } template <class P> void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written) { - enabled_storage_buffers[stage] |= 1U << ssbo_index; - written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; + channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index; + channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; const auto& cbufs = maxwell3d->state.shader_stages[stage]; const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; - storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); + channel_state->storage_buffers[stage][ssbo_index] = + StorageBufferBinding(ssbo_addr, cbuf_index, is_written); } template <class P> void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { - enabled_texture_buffers[stage] = 0; - written_texture_buffers[stage] = 0; - image_texture_buffers[stage] = 0; + channel_state->enabled_texture_buffers[stage] = 0; + channel_state->written_texture_buffers[stage] = 0; + channel_state->image_texture_buffers[stage] = 0; } template <class P> void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image) { - enabled_texture_buffers[stage] |= 1U << tbo_index; - written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index; + channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; } - texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); + channel_state->texture_buffers[stage][tbo_index] = + GetTextureBufferBinding(gpu_addr, size, format); } template <class P> void BufferCache<P>::UnbindComputeStorageBuffers() { - enabled_compute_storage_buffers = 0; - written_compute_storage_buffers = 0; - image_compute_texture_buffers = 0; + channel_state->enabled_compute_storage_buffers = 0; + channel_state->written_compute_storage_buffers = 0; + channel_state->image_compute_texture_buffers = 0; } template <class P> void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written) { - enabled_compute_storage_buffers |= 1U << ssbo_index; - written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; + channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index; + channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; const auto& launch_desc = kepler_compute->launch_description; ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); const auto& cbufs = launch_desc.const_buffer_config; const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; - compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); + channel_state->compute_storage_buffers[ssbo_index] = + StorageBufferBinding(ssbo_addr, cbuf_index, is_written); } template <class P> void BufferCache<P>::UnbindComputeTextureBuffers() { - enabled_compute_texture_buffers = 0; - written_compute_texture_buffers = 0; - image_compute_texture_buffers = 0; + channel_state->enabled_compute_texture_buffers = 0; + channel_state->written_compute_texture_buffers = 0; + channel_state->image_compute_texture_buffers = 0; } template <class P> void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image) { - enabled_compute_texture_buffers |= 1U << tbo_index; - written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + channel_state->enabled_compute_texture_buffers |= 1U << tbo_index; + channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; } - compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); + channel_state->compute_texture_buffers[tbo_index] = + GetTextureBufferBinding(gpu_addr, size, format); } template <class P> @@ -672,10 +685,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { template <class P> void BufferCache<P>::BindHostIndexBuffer() { - Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer, index_buffer.buffer_id); - const u32 offset = buffer.Offset(index_buffer.cpu_addr); - const u32 size = index_buffer.size; + Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; + TouchBuffer(buffer, channel_state->index_buffer.buffer_id); + const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); + const u32 size = channel_state->index_buffer.size; const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { if constexpr (USE_MEMORY_MAPS) { @@ -689,7 +702,7 @@ void BufferCache<P>::BindHostIndexBuffer() { buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); } } else { - SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); + SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); } if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { const u32 new_offset = @@ -706,7 +719,7 @@ template <class P> void BufferCache<P>::BindHostVertexBuffers() { auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = vertex_buffers[index]; + const Binding& binding = channel_state->vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); @@ -729,19 +742,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); }; if (current_draw_indirect->include_count) { - bind_buffer(count_buffer_binding); + bind_buffer(channel_state->count_buffer_binding); } - bind_buffer(indirect_buffer_binding); + bind_buffer(channel_state->indirect_buffer_binding); } template <class P> void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { u32 dirty = ~0U; if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty = std::exchange(dirty_uniform_buffers[stage], 0); + dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { + ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -753,13 +766,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind) { - const Binding& binding = uniform_buffers[stage][index]; + const Binding& binding = channel_state->uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); + const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && - size <= uniform_buffer_skip_cache_size && + size <= channel_state->uniform_buffer_skip_cache_size && !memory_tracker.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { @@ -767,11 +780,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia const bool should_fast_bind = !HasFastUniformBufferBound(stage, binding_index) || - uniform_buffer_binding_sizes[stage][binding_index] != size; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -780,8 +793,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } } if constexpr (IS_OPENGL) { - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -791,15 +804,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Classic cached path const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); if (sync_cached) { - ++uniform_cache_hits[0]; + ++channel_state->uniform_cache_hits[0]; } - ++uniform_cache_shots[0]; + ++channel_state->uniform_cache_shots[0]; // Skip binding if it's not needed and if the bound buffer is not the fast version // This exists to avoid instances where the fast buffer is bound and a GPU write happens needs_bind |= HasFastUniformBufferBound(stage, binding_index); if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; } if (!needs_bind) { return; @@ -807,14 +820,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const u32 offset = buffer.Offset(cpu_addr); if constexpr (IS_OPENGL) { // Fast buffer will be unbound - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); // Mark the index as dirty if offset doesn't match const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); - dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); @@ -826,15 +839,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 template <class P> void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { u32 binding_index = 0; - ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { - const Binding& binding = storage_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { + const Binding& binding = channel_state->storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; + const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); ++binding_index; @@ -846,8 +859,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { template <class P> void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { - ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { - const TextureBufferBinding& binding = texture_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -855,7 +868,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - if (((image_texture_buffers[stage] >> index) & 1) != 0) { + if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); } else { runtime.BindTextureBuffer(buffer, offset, size, format); @@ -872,7 +885,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { return; } for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { - const Binding& binding = transform_feedback_buffers[index]; + const Binding& binding = channel_state->transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; @@ -887,15 +900,16 @@ template <class P> void BufferCache<P>::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty - dirty_uniform_buffers.fill(~u32{0}); - fast_bound_uniform_buffers.fill(0); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { - const Binding& binding = compute_uniform_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { + const Binding& binding = channel_state->compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); - const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); + const u32 size = + std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -911,15 +925,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { template <class P> void BufferCache<P>::BindHostComputeStorageBuffers() { u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { - const Binding& binding = compute_storage_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { + const Binding& binding = channel_state->compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; + const bool is_written = + ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); ++binding_index; @@ -931,8 +946,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { template <class P> void BufferCache<P>::BindHostComputeTextureBuffers() { - ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { - const TextureBufferBinding& binding = compute_texture_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -940,7 +955,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - if (((image_compute_texture_buffers >> index) & 1) != 0) { + if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); } else { runtime.BindTextureBuffer(buffer, offset, size, format); @@ -954,7 +969,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { template <class P> void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; if (is_indexed) { UpdateIndexBuffer(); } @@ -968,7 +983,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { if (current_draw_indirect) { UpdateDrawIndirect(); } - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> @@ -999,7 +1014,7 @@ void BufferCache<P>::UpdateIndexBuffer() { slot_buffers.erase(inline_buffer_id); inline_buffer_id = CreateBuffer(0, buffer_size); } - index_buffer = Binding{ + channel_state->index_buffer = Binding{ .cpu_addr = 0, .size = inline_index_size, .buffer_id = inline_buffer_id, @@ -1015,10 +1030,10 @@ void BufferCache<P>::UpdateIndexBuffer() { (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); const u32 size = std::min(address_size, draw_size); if (size == 0 || !cpu_addr) { - index_buffer = NULL_BINDING; + channel_state->index_buffer = NULL_BINDING; return; } - index_buffer = Binding{ + channel_state->index_buffer = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = FindBuffer(*cpu_addr, size), @@ -1051,13 +1066,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); u32 size = address_size; // TODO: Analyze stride and number of vertices if (array.enable == 0 || size == 0 || !cpu_addr) { - vertex_buffers[index] = NULL_BINDING; + channel_state->vertex_buffers[index] = NULL_BINDING; return; } if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); } - vertex_buffers[index] = Binding{ + channel_state->vertex_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = FindBuffer(*cpu_addr, size), @@ -1079,23 +1094,24 @@ void BufferCache<P>::UpdateDrawIndirect() { }; }; if (current_draw_indirect->include_count) { - update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); + update(current_draw_indirect->count_start_address, sizeof(u32), + channel_state->count_buffer_binding); } update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, - indirect_buffer_binding); + channel_state->indirect_buffer_binding); } template <class P> void BufferCache<P>::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { - Binding& binding = uniform_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { + Binding& binding = channel_state->uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated return; } // Mark as dirty if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty_uniform_buffers[stage] |= 1U << index; + channel_state->dirty_uniform_buffers[stage] |= 1U << index; } // Resolve buffer binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); @@ -1104,10 +1120,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateStorageBuffers(size_t stage) { - const u32 written_mask = written_storage_buffers[stage]; - ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { + const u32 written_mask = channel_state->written_storage_buffers[stage]; + ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { // Resolve buffer - Binding& binding = storage_buffers[stage][index]; + Binding& binding = channel_state->storage_buffers[stage][index]; const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); binding.buffer_id = buffer_id; // Mark buffer as written if needed @@ -1119,11 +1135,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateTextureBuffers(size_t stage) { - ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { - Binding& binding = texture_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = channel_state->texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark buffer as written if needed - if (((written_texture_buffers[stage] >> index) & 1) != 0) { + if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1146,11 +1162,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { const u32 size = binding.size; const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (binding.enable == 0 || size == 0 || !cpu_addr) { - transform_feedback_buffers[index] = NULL_BINDING; + channel_state->transform_feedback_buffers[index] = NULL_BINDING; return; } const BufferId buffer_id = FindBuffer(*cpu_addr, size); - transform_feedback_buffers[index] = Binding{ + channel_state->transform_feedback_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = buffer_id, @@ -1160,8 +1176,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { - Binding& binding = compute_uniform_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { + Binding& binding = channel_state->compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute->launch_description; if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { @@ -1178,12 +1194,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { template <class P> void BufferCache<P>::UpdateComputeStorageBuffers() { - ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { + ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer - Binding& binding = compute_storage_buffers[index]; + Binding& binding = channel_state->compute_storage_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed - if (((written_compute_storage_buffers >> index) & 1) != 0) { + if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1191,11 +1207,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { template <class P> void BufferCache<P>::UpdateComputeTextureBuffers() { - ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { - Binding& binding = compute_texture_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = channel_state->compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed - if (((written_compute_texture_buffers >> index) & 1) != 0) { + if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1610,13 +1626,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { const auto replace = [scalar_replace](std::span<Binding> bindings) { std::ranges::for_each(bindings, scalar_replace); }; - scalar_replace(index_buffer); - replace(vertex_buffers); - std::ranges::for_each(uniform_buffers, replace); - std::ranges::for_each(storage_buffers, replace); - replace(transform_feedback_buffers); - replace(compute_uniform_buffers); - replace(compute_storage_buffers); + scalar_replace(channel_state->index_buffer); + replace(channel_state->vertex_buffers); + std::ranges::for_each(channel_state->uniform_buffers, replace); + std::ranges::for_each(channel_state->storage_buffers, replace); + replace(channel_state->transform_feedback_buffers); + replace(channel_state->compute_uniform_buffers); + replace(channel_state->compute_storage_buffers); // Mark the whole buffer as CPU written to stop tracking CPU writes if (!do_not_mark) { @@ -1634,8 +1650,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { template <class P> void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty_uniform_buffers.fill(~u32{0}); - uniform_buffer_binding_sizes.fill({}); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d->dirty.flags; flags[Dirty::IndexBuffer] = true; @@ -1643,13 +1659,12 @@ void BufferCache<P>::NotifyBufferDeletion() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { flags[Dirty::VertexBuffer0 + index] = true; } - has_deleted_buffers = true; + channel_state->has_deleted_buffers = true; } template <class P> -typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, - u32 cbuf_index, - bool is_written) const { +Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, + bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); const auto size = [&]() { const bool is_nvn_cbuf = cbuf_index == 0; @@ -1681,8 +1696,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s } template <class P> -typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( - GPUVAddr gpu_addr, u32 size, PixelFormat format) { +TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format) { const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); TextureBufferBinding binding; if (!cpu_addr || size == 0) { @@ -1721,7 +1736,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { template <class P> bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { if constexpr (IS_OPENGL) { - return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; + return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; } else { // Only OpenGL has fast uniform buffers return false; @@ -1730,14 +1745,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) template <class P> std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { - auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; - return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); + auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); } template <class P> std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { - auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; - return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); + auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); } } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index ac00d4d9d..c689fe06b 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 { MarkQuery = 3, }; -template <typename P> -class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { +static constexpr BufferId NULL_BUFFER_ID{0}; +static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); + +struct Binding { + VAddr cpu_addr{}; + u32 size{}; + BufferId buffer_id; +}; + +struct TextureBufferBinding : Binding { + PixelFormat format; +}; + +static constexpr Binding NULL_BINDING{ + .cpu_addr = 0, + .size = 0, + .buffer_id = NULL_BUFFER_ID, +}; + +class BufferCacheChannelInfo : public ChannelInfo { +public: + BufferCacheChannelInfo() = delete; + BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {} + BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete; + BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete; + + Binding index_buffer; + std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; + std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; + std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; + std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; + std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; + Binding count_buffer_binding; + Binding indirect_buffer_binding; + + std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; + std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; + std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; + + std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; + + std::array<u32, NUM_STAGES> enabled_storage_buffers{}; + std::array<u32, NUM_STAGES> written_storage_buffers{}; + u32 enabled_compute_storage_buffers = 0; + u32 written_compute_storage_buffers = 0; + + std::array<u32, NUM_STAGES> enabled_texture_buffers{}; + std::array<u32, NUM_STAGES> written_texture_buffers{}; + std::array<u32, NUM_STAGES> image_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; + + std::array<u32, 16> uniform_cache_hits{}; + std::array<u32, 16> uniform_cache_shots{}; + + u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; + + bool has_deleted_buffers = false; + + std::array<u32, NUM_STAGES> dirty_uniform_buffers{}; + std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> + uniform_buffer_binding_sizes{}; +}; + +template <class P> +class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> { // Page size for caching purposes. // This is unrelated to the CPU page size and it can be changed as it seems optimal. static constexpr u32 CACHING_PAGEBITS = 16; @@ -104,8 +174,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; - static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; static constexpr s64 TARGET_THRESHOLD = 4_GiB; @@ -149,8 +217,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI using OverlapSection = boost::icl::inter_section<int>; using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; - struct Empty {}; - struct OverlapResult { std::vector<BufferId> ids; VAddr begin; @@ -158,25 +224,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI bool has_stream_leap = false; }; - struct Binding { - VAddr cpu_addr{}; - u32 size{}; - BufferId buffer_id; - }; - - struct TextureBufferBinding : Binding { - PixelFormat format; - }; - - static constexpr Binding NULL_BINDING{ - .cpu_addr = 0, - .size = 0, - .buffer_id = NULL_BUFFER_ID, - }; - public: - static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_); @@ -496,51 +544,6 @@ private: u32 last_index_count = 0; - Binding index_buffer; - std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; - std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; - std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; - std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; - std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; - Binding count_buffer_binding; - Binding indirect_buffer_binding; - - std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; - std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; - std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; - - std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; - u32 enabled_compute_uniform_buffer_mask = 0; - - const UniformBufferSizes* uniform_buffer_sizes{}; - const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; - - std::array<u32, NUM_STAGES> enabled_storage_buffers{}; - std::array<u32, NUM_STAGES> written_storage_buffers{}; - u32 enabled_compute_storage_buffers = 0; - u32 written_compute_storage_buffers = 0; - - std::array<u32, NUM_STAGES> enabled_texture_buffers{}; - std::array<u32, NUM_STAGES> written_texture_buffers{}; - std::array<u32, NUM_STAGES> image_texture_buffers{}; - u32 enabled_compute_texture_buffers = 0; - u32 written_compute_texture_buffers = 0; - u32 image_compute_texture_buffers = 0; - - std::array<u32, 16> uniform_cache_hits{}; - std::array<u32, 16> uniform_cache_shots{}; - - u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; - - bool has_deleted_buffers = false; - - std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> - dirty_uniform_buffers{}; - std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; - std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, - std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> - uniform_buffer_binding_sizes{}; - MemoryTracker memory_tracker; IntervalSet uncommitted_ranges; IntervalSet common_ranges; diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 3e9022dce..cd6a3a9b8 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -5,6 +5,7 @@ #include <fstream> #include <vector> #include "common/assert.h" +#include "common/scope_exit.h" #include "common/settings.h" #include "video_core/host1x/codecs/codec.h" #include "video_core/host1x/codecs/h264.h" @@ -14,6 +15,8 @@ #include "video_core/memory_manager.h" extern "C" { +#include <libavfilter/buffersink.h> +#include <libavfilter/buffersrc.h> #include <libavutil/opt.h> #ifdef LIBVA_FOUND // for querying VAAPI driver information @@ -85,6 +88,10 @@ Codec::~Codec() { // Free libav memory avcodec_free_context(&av_codec_ctx); av_buffer_unref(&av_gpu_decoder); + + if (filters_initialized) { + avfilter_graph_free(&av_filter_graph); + } } bool Codec::CreateGpuAvDevice() { @@ -167,6 +174,62 @@ void Codec::InitializeGpuDecoder() { av_codec_ctx->get_format = GetGpuFormat; } +void Codec::InitializeAvFilters(AVFrame* frame) { + const AVFilter* buffer_src = avfilter_get_by_name("buffer"); + const AVFilter* buffer_sink = avfilter_get_by_name("buffersink"); + AVFilterInOut* inputs = avfilter_inout_alloc(); + AVFilterInOut* outputs = avfilter_inout_alloc(); + SCOPE_EXIT({ + avfilter_inout_free(&inputs); + avfilter_inout_free(&outputs); + }); + + // Don't know how to get the accurate time_base but it doesn't matter for yadif filter + // so just use 1/1 to make buffer filter happy + std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width, + frame->height, frame->format); + + av_filter_graph = avfilter_graph_alloc(); + int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(), + nullptr, av_filter_graph); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret); + return; + } + + ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr, + av_filter_graph); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret); + return; + } + + inputs->name = av_strdup("out"); + inputs->filter_ctx = av_filter_sink_ctx; + inputs->pad_idx = 0; + inputs->next = nullptr; + + outputs->name = av_strdup("in"); + outputs->filter_ctx = av_filter_src_ctx; + outputs->pad_idx = 0; + outputs->next = nullptr; + + const char* description = "yadif=1:-1:0"; + ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret); + return; + } + + ret = avfilter_graph_config(av_filter_graph, nullptr); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret); + return; + } + + filters_initialized = true; +} + void Codec::Initialize() { const AVCodecID codec = [&] { switch (current_codec) { @@ -271,8 +334,34 @@ void Codec::Decode() { UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(std::move(final_frame)); - if (av_frames.size() > 10) { + if (!final_frame->interlaced_frame) { + av_frames.push(std::move(final_frame)); + } else { + if (!filters_initialized) { + InitializeAvFilters(final_frame.get()); + } + if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(), + AV_BUFFERSRC_FLAG_KEEP_REF); + ret) { + LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret); + return; + } + while (true) { + auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + + int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get()); + + if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) + break; + if (ret < 0) { + LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret); + return; + } + + av_frames.push(std::move(filter_frame)); + } + } + while (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); } diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index 0d45fb7fe..06fe00a4b 100644 --- a/src/video_core/host1x/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h @@ -15,6 +15,7 @@ extern "C" { #pragma GCC diagnostic ignored "-Wconversion" #endif #include <libavcodec/avcodec.h> +#include <libavfilter/avfilter.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif @@ -61,17 +62,24 @@ public: private: void InitializeAvCodecContext(); + void InitializeAvFilters(AVFrame* frame); + void InitializeGpuDecoder(); bool CreateGpuAvDevice(); bool initialized{}; + bool filters_initialized{}; Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; const AVCodec* av_codec{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; AVBufferRef* av_gpu_decoder{nullptr}; + AVFilterContext* av_filter_src_ctx{nullptr}; + AVFilterContext* av_filter_sink_ctx{nullptr}; + AVFilterGraph* av_filter_graph{nullptr}; + Host1x::Host1x& host1x; const Host1x::NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6af4ae793..6d3bda192 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -117,7 +117,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) for (auto& stage_uniforms : fast_uniforms) { for (OGLBuffer& buffer : stage_uniforms) { buffer.Create(); - glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr, + glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1e0823836..56d0ff869 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -439,6 +439,11 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return GL_R32UI; } +[[nodiscard]] bool IsAstcRecompressionEnabled() { + return Settings::values.astc_recompression.GetValue() != + Settings::AstcRecompression::Uncompressed; +} + [[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) { switch (Settings::values.astc_recompression.GetValue()) { case Settings::AstcRecompression::Bc1: @@ -760,7 +765,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { gl_internal_format = SelectAstcFormat(info.format, is_srgb); gl_format = GL_NONE; } @@ -1155,7 +1160,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI const bool is_srgb = IsPixelFormatSRGB(info.format); internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { internal_format = SelectAstcFormat(info.format, is_srgb); } } else { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 91512022f..d79594ce5 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() { flags &= ~ImageFlagBits::Alias; } -void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { +bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; ASSERT(lhs.info.type == rhs.info.type); std::optional<SubresourceBase> base; @@ -169,7 +169,7 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i } if (!base) { LOG_ERROR(HW_GPU, "Image alias should have been flipped"); - return; + return false; } const PixelFormat lhs_format = lhs.info.format; const PixelFormat rhs_format = rhs.info.format; @@ -248,12 +248,13 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i } ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); if (lhs_alias.copies.empty()) { - return; + return false; } lhs.aliased_images.push_back(std::move(lhs_alias)); rhs.aliased_images.push_back(std::move(rhs_alias)); lhs.flags &= ~ImageFlagBits::IsRescalable; rhs.flags &= ~ImageFlagBits::IsRescalable; + return true; } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 329396bb6..1b8a17ee8 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -142,6 +142,6 @@ struct ImageAllocBase { std::vector<ImageId> images; }; -void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); +bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9790949f5..2cf082c5d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -139,7 +139,6 @@ void TextureCache<P>::TickFrame() { TickAsyncDecode(); runtime.TickFrame(); - critical_gc = 0; ++frame_tick; if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { @@ -1312,17 +1311,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); - boost::container::small_vector<ImageId, 4> overlap_ids; - std::unordered_set<ImageId> overlaps_found; - boost::container::small_vector<ImageId, 4> left_aliased_ids; - boost::container::small_vector<ImageId, 4> right_aliased_ids; - std::unordered_set<ImageId> ignore_textures; - boost::container::small_vector<ImageId, 4> bad_overlap_ids; - boost::container::small_vector<ImageId, 4> all_siblings; + join_overlap_ids.clear(); + join_overlaps_found.clear(); + join_left_aliased_ids.clear(); + join_right_aliased_ids.clear(); + join_ignore_textures.clear(); + join_bad_overlap_ids.clear(); + join_copies_to_do.clear(); + join_alias_indices.clear(); const bool this_is_linear = info.type == ImageType::Linear; const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); + join_ignore_textures.insert(overlap_id); return; } const bool overlap_is_linear = overlap.info.type == ImageType::Linear; @@ -1332,11 +1332,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (this_is_linear && overlap_is_linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch - left_aliased_ids.push_back(overlap_id); + join_left_aliased_ids.push_back(overlap_id); } return; } - overlaps_found.insert(overlap_id); + join_overlaps_found.insert(overlap_id); static constexpr bool strict_size = true; const std::optional<OverlapResult> solution = ResolveOverlap( new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); @@ -1344,33 +1344,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA gpu_addr = solution->gpu_addr; cpu_addr = solution->cpu_addr; new_info.resources = solution->resources; - overlap_ids.push_back(overlap_id); - all_siblings.push_back(overlap_id); + join_overlap_ids.push_back(overlap_id); + join_copies_to_do.emplace_back(JoinCopy{false, overlap_id}); return; } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { - left_aliased_ids.push_back(overlap_id); + join_left_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; - all_siblings.push_back(overlap_id); + join_copies_to_do.emplace_back(JoinCopy{true, overlap_id}); } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { - right_aliased_ids.push_back(overlap_id); + join_right_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; - all_siblings.push_back(overlap_id); + join_copies_to_do.emplace_back(JoinCopy{true, overlap_id}); } else { - bad_overlap_ids.push_back(overlap_id); + join_bad_overlap_ids.push_back(overlap_id); } }; ForEachImageInRegion(cpu_addr, size_bytes, region_check); const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { + if (!join_overlaps_found.contains(overlap_id)) { if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); + join_ignore_textures.insert(overlap_id); } if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { - ignore_textures.insert(overlap_id); + join_ignore_textures.insert(overlap_id); } } }; @@ -1378,11 +1378,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = info.rescaleable; bool any_rescaled = false; - for (const ImageId sibling_id : all_siblings) { + for (const auto& copy : join_copies_to_do) { if (!can_rescale) { break; } - Image& sibling = slot_images[sibling_id]; + Image& sibling = slot_images[copy.id]; can_rescale &= ImageCanRescale(sibling); any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); } @@ -1390,13 +1390,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA can_rescale &= any_rescaled; if (can_rescale) { - for (const ImageId sibling_id : all_siblings) { - Image& sibling = slot_images[sibling_id]; + for (const auto& copy : join_copies_to_do) { + Image& sibling = slot_images[copy.id]; ScaleUp(sibling); } } else { - for (const ImageId sibling_id : all_siblings) { - Image& sibling = slot_images[sibling_id]; + for (const auto& copy : join_copies_to_do) { + Image& sibling = slot_images[copy.id]; ScaleDown(sibling); } } @@ -1408,7 +1408,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA new_image.flags |= ImageFlagBits::Sparse; } - for (const ImageId overlap_id : ignore_textures) { + for (const ImageId overlap_id : join_ignore_textures) { Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { UNIMPLEMENTED(); @@ -1429,14 +1429,60 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA ScaleDown(new_image); } - std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) { - const ImageBase& lhs_image = slot_images[lhs]; - const ImageBase& rhs_image = slot_images[rhs]; + std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) { + const ImageBase& lhs_image = slot_images[lhs.id]; + const ImageBase& rhs_image = slot_images[rhs.id]; return lhs_image.modification_tick < rhs_image.modification_tick; }); - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : join_right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + size_t alias_index = new_image_base.aliased_images.size(); + if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) { + continue; + } + join_alias_indices.emplace(aliased_id, alias_index); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : join_left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + size_t alias_index = new_image_base.aliased_images.size(); + if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) { + continue; + } + join_alias_indices.emplace(aliased_id, alias_index); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : join_bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 && + aliased.overlapping_images.size() > 1) { + aliased.flags |= ImageFlagBits::BadOverlap; + } + if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 && + new_image.overlapping_images.size() > 1) { + new_image.flags |= ImageFlagBits::BadOverlap; + } + } + + for (const auto& copy_object : join_copies_to_do) { + Image& overlap = slot_images[copy_object.id]; + if (copy_object.is_alias) { + if (!overlap.IsSafeDownload()) { + continue; + } + const auto alias_pointer = join_alias_indices.find(copy_object.id); + if (alias_pointer == join_alias_indices.end()) { + continue; + } + const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second]; + CopyImage(new_image_id, aliased.id, aliased.copies); + new_image.modification_tick = overlap.modification_tick; + continue; + } if (True(overlap.flags & ImageFlagBits::GpuModified)) { new_image.flags |= ImageFlagBits::GpuModified; const auto& resolution = Settings::values.resolution_info; @@ -1449,35 +1495,15 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } else { runtime.CopyImage(new_image, overlap, std::move(copies)); } + new_image.modification_tick = overlap.modification_tick; } if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - ImageBase& new_image_base = new_image; - for (const ImageId aliased_id : right_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : left_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : bad_overlap_ids) { - ImageBase& aliased = slot_images[aliased_id]; - aliased.overlapping_images.push_back(new_image_id); - new_image.overlapping_images.push_back(aliased_id); - if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) { - aliased.flags |= ImageFlagBits::BadOverlap; - } - if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) { - new_image.flags |= ImageFlagBits::BadOverlap; + UntrackImage(overlap, copy_object.id); } + UnregisterImage(copy_object.id); + DeleteImage(copy_object.id); } + RegisterImage(new_image_id); return new_image_id; } @@ -1507,7 +1533,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag if (!copy.must_accelerate) { do { if (!src_id && !dst_id) { - break; + return std::nullopt; } if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { break; @@ -1885,10 +1911,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); - if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { - RunGarbageCollector(); - critical_gc++; - } image.lru_index = lru_cache.Insert(image_id, frame_tick); ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 1a3308e2d..3bfa92154 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -10,7 +10,9 @@ #include <span> #include <type_traits> #include <unordered_map> +#include <unordered_set> #include <vector> +#include <boost/container/small_vector.hpp> #include <queue> #include "common/common_types.h" @@ -427,7 +429,6 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; - size_t critical_gc; struct BufferDownload { GPUVAddr address; @@ -477,6 +478,20 @@ private: Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; + + // Join caching + boost::container::small_vector<ImageId, 4> join_overlap_ids; + std::unordered_set<ImageId> join_overlaps_found; + boost::container::small_vector<ImageId, 4> join_left_aliased_ids; + boost::container::small_vector<ImageId, 4> join_right_aliased_ids; + std::unordered_set<ImageId> join_ignore_textures; + boost::container::small_vector<ImageId, 4> join_bad_overlap_ids; + struct JoinCopy { + bool is_alias; + ImageId id; + }; + boost::container::small_vector<JoinCopy, 4> join_copies_to_do; + std::unordered_map<ImageId, size_t> join_alias_indices; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 1463f157b..95a5b47d8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -123,7 +123,9 @@ template <u32 GOB_EXTENT> return { .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), - .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), + .depth = level == 0 + ? block_size.depth + : AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), }; } @@ -165,6 +167,13 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { + if (level == 0) { + return Extent3D{ + .width = info.block.width, + .height = info.block.height, + .depth = info.block.depth, + }; + } const Extent3D blocks = NumLevelBlocks(info, level); return Extent3D{ .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), @@ -1288,7 +1297,9 @@ u32 MapSizeBytes(const ImageBase& image) { static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); -static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000); + +static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000); static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) == 0x2afc00); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 3a7c2dedf..aea677cb3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -473,11 +473,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (extensions.push_descriptor && is_intel_anv) { const u32 version = (properties.properties.driverVersion << 3) >> 3; - if (version >= VK_MAKE_API_VERSION(0, 22, 3, 0)) { + if (version >= VK_MAKE_API_VERSION(0, 22, 3, 0) && + version < VK_MAKE_API_VERSION(0, 23, 2, 0)) { // Disable VK_KHR_push_descriptor due to // mesa/mesa/-/commit/ff91c5ca42bc80aa411cb3fd8f550aa6fdd16bdc LOG_WARNING(Render_Vulkan, - "ANV drivers 22.3.0 and later have broken VK_KHR_push_descriptor"); + "ANV drivers 22.3.0 to 23.1.0 have broken VK_KHR_push_descriptor"); extensions.push_descriptor = false; loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } |