summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp')
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp155
1 files changed, 151 insertions, 4 deletions
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..dfd8c8e5a 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
#include <fmt/format.h>
+#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
@@ -17,18 +18,119 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+namespace {
+// Maximum potential alignment of a Vulkan buffer
+constexpr VkDeviceSize MAX_ALIGNMENT = 256;
+// Maximum size to put elements in the stream buffer
+constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
+// Stream buffer size in bytes
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
+
+constexpr VkMemoryPropertyFlags HOST_FLAGS =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
+
+bool IsStreamHeap(VkMemoryHeap heap) noexcept {
+ return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
+}
+
+std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
+ VkMemoryPropertyFlags flags) noexcept {
+ for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+ if (((type_mask >> type_index) & 1) == 0) {
+ // Memory type is incompatible
+ continue;
+ }
+ const VkMemoryType& memory_type = props.memoryTypes[type_index];
+ if ((memory_type.propertyFlags & flags) != flags) {
+ // Memory type doesn't have the flags we want
+ continue;
+ }
+ if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
+ // Memory heap is not suitable for streaming
+ continue;
+ }
+ // Success!
+ return type_index;
+ }
+ return std::nullopt;
+}
+
+u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
+ // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
+ std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
+ if (type) {
+ return *type;
+ }
+ // Otherwise try without the DEVICE_LOCAL_BIT
+ type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
+ if (type) {
+ return *type;
+ }
+ // This should never happen, and in case it does, signal it as an out of memory situation
+ throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+}
+
+size_t Region(size_t iterator) noexcept {
+ return iterator / REGION_SIZE;
+}
+} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_)
- : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {}
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+ const vk::Device& dev = device.GetLogical();
+ stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = STREAM_BUFFER_SIZE,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ stream_buffer.SetObjectNameEXT("Stream Buffer");
+ }
+ VkMemoryDedicatedRequirements dedicated_reqs{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ .pNext = nullptr,
+ .prefersDedicatedAllocation = VK_FALSE,
+ .requiresDedicatedAllocation = VK_FALSE,
+ };
+ const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
+ const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
+ dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
+ const VkMemoryDedicatedAllocateInfo dedicated_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .image = nullptr,
+ .buffer = *stream_buffer,
+ };
+ const auto memory_properties = device.GetPhysical().GetMemoryProperties();
+ stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = make_dedicated ? &dedicated_info : nullptr,
+ .allocationSize = requirements.size,
+ .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
+ });
+ if (device.HasDebuggingToolAttached()) {
+ stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+ }
+ stream_buffer.BindMemory(*stream_memory, 0);
+ stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
- if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
- return *ref;
+ if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+ return GetStreamBuffer(size);
}
- return CreateStagingBuffer(size, usage);
+ return GetStagingBuffer(size, usage);
}
void StagingBufferPool::TickFrame() {
@@ -39,6 +141,51 @@ void StagingBufferPool::TickFrame() {
ReleaseCache(MemoryUsage::Download);
}
+StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
+ if (AreRegionsActive(Region(free_iterator) + 1,
+ std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
+ // Avoid waiting for the previous usages to be free
+ return GetStagingBuffer(size, MemoryUsage::Upload);
+ }
+ const u64 current_tick = scheduler.CurrentTick();
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
+ current_tick);
+ used_iterator = iterator;
+ free_iterator = std::max(free_iterator, iterator + size);
+
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
+ current_tick);
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ if (AreRegionsActive(0, Region(size) + 1)) {
+ // Avoid waiting for the previous usages to be free
+ return GetStagingBuffer(size, MemoryUsage::Upload);
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return StagingBufferRef{
+ .buffer = *stream_buffer,
+ .offset = static_cast<VkDeviceSize>(offset),
+ .mapped_span = std::span<u8>(stream_pointer + offset, size),
+ };
+}
+
+bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
+ return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
+ [this](u64 sync_tick) { return !scheduler.IsFree(sync_tick); });
+};
+
+StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
+ if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
+ return *ref;
+ }
+ return CreateStagingBuffer(size, usage);
+}
+
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
MemoryUsage usage) {
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];