summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-04-14 18:07:38 +0200
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-04-29 00:18:21 +0200
commite3a2ca96bd2350471ebb6c2907c67b10254a4f7e (patch)
tree5238364cdea97449adb0766df0d6263123ed06da /src
parentTextureCache: refactor DMA downloads to allow multiple buffers. (diff)
downloadyuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.gz
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.bz2
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.lz
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.xz
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.zst
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.zip
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h118
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h23
6 files changed, 123 insertions, 53 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2de533584..4993d4709 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
- const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
- : VideoCommon::ObtainBufferOperation::MarkAsWritten;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
if constexpr (IS_IMAGE_UPLOAD) {
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
- texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span);
+ texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
+ buffer_operand.address, buffer_size);
}
return true;
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 8fc783cc0..2559a3aa7 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -781,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
- const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
- : VideoCommon::ObtainBufferOperation::MarkAsWritten;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -793,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
if constexpr (IS_IMAGE_UPLOAD) {
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
- texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span);
+ texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
+ buffer_operand.address, buffer_size);
}
return true;
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index e4d077e63..da3841bb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1342,17 +1342,19 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
UploadMemory(map.buffer, map.offset, copies);
}
-void Image::DownloadMemory(std::span<VkBuffer> buffers_span, VkDeviceSize offset,
+void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
boost::container::small_vector<VkBuffer, 1> buffers_vector{};
- for (auto& buffer : buffers_span) {
- buffers_vector.push_back(buffer);
+ boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
+ for (size_t index = 0; index < buffers_span.size(); index++) {
+ buffers_vector.emplace_back(buffers_span[index]);
+ vk_copies.emplace_back(
+ TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
- std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
@@ -1377,9 +1379,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, VkDeviceSize offset
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
- for (auto buffer : buffers) {
- cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer,
- vk_copies);
+ for (size_t index = 0; index < buffers.size(); index++) {
+ cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
+ vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
@@ -1418,7 +1420,10 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
std::array buffers{
map.buffer,
};
- DownloadMemory(buffers, map.offset, copies);
+ std::array offsets{
+ map.offset,
+ };
+ DownloadMemory(buffers, offsets, copies);
}
bool Image::IsRescaled() const noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 422476188..bdaf43ba4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -138,7 +138,7 @@ public:
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void DownloadMemory(std::span<VkBuffer> buffers, VkDeviceSize offset,
+ void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(const StagingBufferRef& map,
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 2cd5aa31e..63b8b5af5 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -661,27 +661,40 @@ template <class P>
void TextureCache<P>::CommitAsyncFlushes() {
// This is intentionally passing the value by copy
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- const std::span<const ImageId> download_ids = uncommitted_downloads;
+ auto& download_ids = uncommitted_downloads;
if (download_ids.empty()) {
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
- async_buffers.emplace_back(std::optional<AsyncBuffer>{});
+ async_buffers.emplace_back(std::move(uncommitted_async_buffers));
+ uncommitted_async_buffers.clear();
return;
}
size_t total_size_bytes = 0;
- for (const ImageId image_id : download_ids) {
- total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
+ size_t last_async_buffer_id = uncommitted_async_buffers.size();
+ bool any_none_dma = false;
+ for (PendingDownload& download_info : download_ids) {
+ if (download_info.is_swizzle) {
+ total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
+ any_none_dma = true;
+ download_info.async_buffer_id = last_async_buffer_id;
+ }
}
- auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
- for (const ImageId image_id : download_ids) {
- Image& image = slot_images[image_id];
- const auto copies = FullDownloadCopies(image.info);
- image.DownloadMemory(download_map, copies);
- download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
+ if (any_none_dma) {
+ auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
+ for (const PendingDownload& download_info : download_ids) {
+ if (download_info.is_swizzle) {
+ Image& image = slot_images[download_info.object_id];
+ const auto copies = FullDownloadCopies(image.info);
+ image.DownloadMemory(download_map, copies);
+ download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
+ }
+ }
+ uncommitted_async_buffers.emplace_back(download_map);
}
- async_buffers.emplace_back(download_map);
}
committed_downloads.emplace_back(std::move(uncommitted_downloads));
+ async_buffers.emplace_back(std::move(uncommitted_async_buffers));
+ uncommitted_async_buffers.clear();
uncommitted_downloads.clear();
}
@@ -691,39 +704,57 @@ void TextureCache<P>::PopAsyncFlushes() {
return;
}
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- const std::span<const ImageId> download_ids = committed_downloads.front();
+ const auto& download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
async_buffers.pop_front();
return;
}
- auto download_map = *async_buffers.front();
- std::span<u8> download_span = download_map.mapped_span;
+ auto download_map = std::move(async_buffers.front());
for (size_t i = download_ids.size(); i > 0; i--) {
- const ImageBase& image = slot_images[download_ids[i - 1]];
- const auto copies = FullDownloadCopies(image.info);
- download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
- std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
- SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
- swizzle_data_buffer);
+ auto& download_info = download_ids[i - 1];
+ auto& download_buffer = download_map[download_info.async_buffer_id];
+ if (download_info.is_swizzle) {
+ const ImageBase& image = slot_images[download_info.object_id];
+ const auto copies = FullDownloadCopies(image.info);
+ download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
+ std::span<u8> download_span =
+ download_buffer.mapped_span.subspan(download_buffer.offset);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
+ swizzle_data_buffer);
+ } else {
+ const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
+ std::span<u8> download_span =
+ download_buffer.mapped_span.subspan(download_buffer.offset);
+ gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
+ buffer_info.size);
+ slot_buffer_downloads.erase(download_info.object_id);
+ }
+ }
+ for (auto& download_buffer : download_map) {
+ runtime.FreeDeferredStagingBuffer(download_buffer);
}
- runtime.FreeDeferredStagingBuffer(download_map);
committed_downloads.pop_front();
async_buffers.pop_front();
} else {
- const std::span<const ImageId> download_ids = committed_downloads.front();
+ const auto& download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
return;
}
size_t total_size_bytes = 0;
- for (const ImageId image_id : download_ids) {
- total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
+ for (const PendingDownload& download_info : download_ids) {
+ if (download_info.is_swizzle) {
+ total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
+ }
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
- for (const ImageId image_id : download_ids) {
- Image& image = slot_images[image_id];
+ for (const PendingDownload& download_info : download_ids) {
+ if (download_info.is_swizzle) {
+ continue;
+ }
+ Image& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +763,11 @@ void TextureCache<P>::PopAsyncFlushes() {
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
- for (const ImageId image_id : download_ids) {
- const ImageBase& image = slot_images[image_id];
+ for (const PendingDownload& download_info : download_ids) {
+ if (download_info.is_swizzle) {
+ continue;
+ }
+ const ImageBase& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
@@ -836,11 +870,27 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
template <class P>
void TextureCache<P>::DownloadImageIntoBuffer(
typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer,
- size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies) {
- std::array buffers{
- buffer,
- };
- image->DownloadMemory(buffers, buffer_offset, copies);
+ size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies, GPUVAddr address, size_t size) {
+ if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
+ auto slot = slot_buffer_downloads.insert(address, size);
+ uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot);
+ auto download_map = runtime.DownloadStagingBuffer(size, true);
+ uncommitted_async_buffers.emplace_back(download_map);
+ std::array buffers{
+ buffer,
+ download_map.buffer,
+ };
+ std::array buffer_offsets{
+ buffer_offset,
+ download_map.offset,
+ };
+ image->DownloadMemory(buffers, buffer_offsets, copies);
+ } else {
+ std::array buffers{
+ buffer,
+ };
+ image->DownloadMemory(buffers, buffer_offset, copies);
+ }
}
template <class P>
@@ -2219,7 +2269,7 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
if (new_id) {
const ImageViewBase& old_view = slot_image_views[new_id];
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
- uncommitted_downloads.push_back(old_view.image_id);
+ uncommitted_downloads.emplace_back(true, 0, old_view.image_id);
}
}
*old_id = new_id;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 51f44aed5..d5bba3379 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -217,7 +217,8 @@ public:
const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
- std::span<const VideoCommon::BufferImageCopy> copies);
+ std::span<const VideoCommon::BufferImageCopy> copies,
+ GPUVAddr address = 0, size_t size = 0);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
@@ -428,17 +429,31 @@ private:
u64 critical_memory;
size_t critical_gc;
+ struct BufferDownload {
+ GPUVAddr address;
+ size_t size;
+ };
+
+ struct PendingDownload {
+ bool is_swizzle;
+ size_t async_buffer_id;
+ SlotId object_id;
+ };
+
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
+ SlotVector<BufferDownload> slot_buffer_downloads;
// TODO: This data structure is not optimal and it should be reworked
- std::vector<ImageId> uncommitted_downloads;
- std::deque<std::vector<ImageId>> committed_downloads;
- std::deque<std::optional<AsyncBuffer>> async_buffers;
+
+ std::vector<PendingDownload> uncommitted_downloads;
+ std::deque<std::vector<PendingDownload>> committed_downloads;
+ std::vector<AsyncBuffer> uncommitted_async_buffers;
+ std::deque<std::vector<AsyncBuffer>> async_buffers;
struct LRUItemParams {
using ObjectType = ImageId;