summaryrefslogtreecommitdiffstats
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorAmeer J <52414509+ameerj@users.noreply.github.com>2023-11-27 03:08:53 +0100
committerGitHub <noreply@github.com>2023-11-27 03:08:53 +0100
commit1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a (patch)
treec219aacab776c0a1e3956614b60a01fa2f6164cb /src/video_core/buffer_cache
parentshader_recompiler: Align SSBO offsets in GlobalMemory functions (diff)
parentMerge pull request #11535 from GPUCode/upload_cmdbuf (diff)
downloadyuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.gz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.bz2
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.lz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.xz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.zst
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.zip
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h102
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/buffer_cache/usage_tracker.h79
3 files changed, 114 insertions, 71 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2648970b6..6d1fc3887 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
if (!channel_state) {
return;
}
+ runtime.TickFrame(slot_buffers);
// Calculate hits and shots and move hit bits to the right
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
@@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
for (const IntervalType& add_interval : tmp_intervals) {
common_ranges.add(add_interval);
}
- runtime.CopyBuffer(dest_buffer, src_buffer, copies);
+ const auto& copy = copies[0];
+ src_buffer.MarkUsage(copy.src_offset, copy.size);
+ dest_buffer.MarkUsage(copy.dst_offset, copy.size);
+ runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
@@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
common_ranges.subtract(subtract_interval);
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
- auto& dest_buffer = slot_buffers[buffer];
+ Buffer& dest_buffer = slot_buffers[buffer];
const u32 offset = dest_buffer.Offset(*cpu_dst_address);
runtime.ClearBuffer(dest_buffer, offset, size, value);
+ dest_buffer.MarkUsage(offset, size);
return true;
}
@@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
async_downloads += std::make_pair(base_interval, 1);
+ buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
normalized_copies.push_back(second_copy);
}
@@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
// Have in mind the staging buffer offset for the copy
copy.dst_offset += download_staging.offset;
const std::array copies{copy};
- runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies,
- false);
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkUsage(copy.src_offset, copy.size);
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
}
runtime.PostCopyBarrier();
runtime.Finish();
@@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size);
- runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
} else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
}
@@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
+ buffer.MarkUsage(offset, size);
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
draw_state.index_buffer.first, draw_state.index_buffer.count,
buffer, offset, size);
@@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
+ buffer.MarkUsage(offset, binding.size);
host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset);
@@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
}
+ buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
} else {
@@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
+ buffer.MarkUsage(offset, size);
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
if (is_written) {
@@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
+ buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
@@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
+ buffer.MarkUsage(offset, size);
host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset);
- host_bindings.sizes.push_back(binding.size);
+ host_bindings.sizes.push_back(size);
}
if (host_bindings.buffers.size() > 0) {
runtime.BindTransformFeedbackBuffers(host_bindings);
@@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
+ buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
++binding_index;
@@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
+ buffer.MarkUsage(offset, size);
const bool is_written =
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
@@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
+ buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
@@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
}
+ const BufferId buffer_id = FindBuffer(*cpu_addr, size);
channel_state->vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr,
.size = size,
- .buffer_id = FindBuffer(*cpu_addr, size),
+ .buffer_id = buffer_id,
};
}
@@ -1192,11 +1209,6 @@ void BufferCache<P>::UpdateDrawIndirect() {
.size = static_cast<u32>(size),
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
};
- VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64);
- VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64);
- IntervalType interval{cpu_addr_start, cpu_addr_end};
- ClearDownload(interval);
- common_ranges.subtract(interval);
};
if (current_draw_indirect->include_count) {
update(current_draw_indirect->count_start_address, sizeof(u32),
@@ -1406,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
.dst_offset = dst_base_offset,
.size = overlap.SizeBytes(),
});
- runtime.CopyBuffer(new_buffer, overlap, copies);
+ new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
+ runtime.CopyBuffer(new_buffer, overlap, copies, true);
DeleteBuffer(overlap_id, true);
}
@@ -1419,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
auto& new_buffer = slot_buffers[new_buffer_id];
- runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
+ const size_t size_bytes = new_buffer.SizeBytes();
+ runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
+ new_buffer.MarkUsage(0, size_bytes);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@@ -1472,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
- return SynchronizeBufferImpl(buffer, cpu_addr, size);
-}
-
-template <class P>
-bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
@@ -1499,51 +1509,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
}
template <class P>
-bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
- boost::container::small_vector<BufferCopy, 4> copies;
- u64 total_size_bytes = 0;
- u64 largest_copy = 0;
- IntervalSet found_sets{};
- auto make_copies = [&] {
- for (auto& interval : found_sets) {
- const std::size_t sub_size = interval.upper() - interval.lower();
- const VAddr cpu_addr_ = interval.lower();
- copies.push_back(BufferCopy{
- .src_offset = total_size_bytes,
- .dst_offset = cpu_addr_ - buffer.CpuAddr(),
- .size = sub_size,
- });
- total_size_bytes += sub_size;
- largest_copy = std::max<u64>(largest_copy, sub_size);
- }
- const std::span<BufferCopy> copies_span(copies.data(), copies.size());
- UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
- };
- memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
- const VAddr base_adr = cpu_addr_out;
- const VAddr end_adr = base_adr + range_size;
- const IntervalType add_interval{base_adr, end_adr};
- found_sets.add(add_interval);
- });
- if (found_sets.empty()) {
- return true;
- }
- const IntervalType search_interval{cpu_addr, cpu_addr + size};
- auto it = common_ranges.lower_bound(search_interval);
- auto it_end = common_ranges.upper_bound(search_interval);
- if (it == common_ranges.end()) {
- make_copies();
- return false;
- }
- while (it != it_end) {
- found_sets.subtract(*it);
- it++;
- }
- make_copies();
- return false;
-}
-
-template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
@@ -1591,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
// Apply the staging offset
copy.src_offset += upload_staging.offset;
}
- runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+ const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
}
}
@@ -1633,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
}};
u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
- runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+ const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
}
@@ -1686,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
for (BufferCopy& copy : copies) {
// Modify copies to have the staging offset in mind
copy.dst_offset += download_staging.offset;
+ buffer.MarkUsage(copy.src_offset, copy.size);
}
- runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
runtime.Finish();
for (const BufferCopy& copy : copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index eed267361..d6d696d8c 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -529,10 +529,6 @@ private:
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
- bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
-
- bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
-
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies);
diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h
new file mode 100644
index 000000000..ab05fe415
--- /dev/null
+++ b/src/video_core/buffer_cache/usage_tracker.h
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+class UsageTracker {
+ static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
+ static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
+ static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
+
+public:
+ explicit UsageTracker(size_t size) {
+ const size_t num_pages = (size >> PAGE_SHIFT) + 1;
+ pages.resize(num_pages, 0ULL);
+ }
+
+ void Reset() noexcept {
+ std::ranges::fill(pages, 0ULL);
+ }
+
+ void Track(u64 offset, u64 size) noexcept {
+ const size_t page = offset >> PAGE_SHIFT;
+ const size_t page_end = (offset + size) >> PAGE_SHIFT;
+ TrackPage(page, offset, size);
+ if (page == page_end) {
+ return;
+ }
+ for (size_t i = page + 1; i < page_end; i++) {
+ pages[i] = ~u64{0};
+ }
+ const size_t offset_end = offset + size;
+ const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
+ TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
+ }
+
+ [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
+ const size_t page = offset >> PAGE_SHIFT;
+ const size_t page_end = (offset + size) >> PAGE_SHIFT;
+ if (IsPageUsed(page, offset, size)) {
+ return true;
+ }
+ for (size_t i = page + 1; i < page_end; i++) {
+ if (pages[i] != 0) {
+ return true;
+ }
+ }
+ const size_t offset_end = offset + size;
+ const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
+ return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
+ }
+
+private:
+ void TrackPage(u64 page, u64 offset, u64 size) noexcept {
+ const size_t offset_in_page = offset % PAGE_BYTES;
+ const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
+ const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
+ const size_t mask = ~u64{0} >> (64 - num_bits);
+ pages[page] |= (~u64{0} & mask) << first_bit;
+ }
+
+ bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
+ const size_t offset_in_page = offset % PAGE_BYTES;
+ const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
+ const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
+ const size_t mask = ~u64{0} >> (64 - num_bits);
+ const size_t mask2 = (~u64{0} & mask) << first_bit;
+ return (pages[page] & mask2) != 0;
+ }
+
+private:
+ std::vector<u64> pages;
+};
+
+} // namespace VideoCommon