summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorAmeer J <52414509+ameerj@users.noreply.github.com>2023-11-27 03:08:53 +0100
committerGitHub <noreply@github.com>2023-11-27 03:08:53 +0100
commit1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a (patch)
treec219aacab776c0a1e3956614b60a01fa2f6164cb /src/video_core/renderer_vulkan
parentshader_recompiler: Align SSBO offsets in GlobalMemory functions (diff)
parentMerge pull request #11535 from GPUCode/upload_cmdbuf (diff)
downloadyuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.gz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.bz2
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.lz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.xz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.zst
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.zip
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h1
18 files changed, 282 insertions, 130 deletions
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7e7a80740..c4c30d807 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -132,16 +132,12 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
+ RenderScreenshot(*framebuffer, use_accelerated);
- {
- std::scoped_lock lock{rasterizer.LockCaches()};
- RenderScreenshot(*framebuffer, use_accelerated);
-
- Frame* frame = present_manager.GetRenderFrame();
- blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
- scheduler.Flush(*frame->render_ready);
- present_manager.Present(frame);
- }
+ Frame* frame = present_manager.GetRenderFrame();
+ blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
+ scheduler.Flush(*frame->render_ready);
+ present_manager.Present(frame);
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 52fc142d1..66483a900 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -137,6 +137,56 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
BlitScreen::~BlitScreen() = default;
+static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer,
+ const ScreenInfo& screen_info) {
+ f32 left, top, right, bottom;
+
+ if (!framebuffer.crop_rect.IsEmpty()) {
+ // If crop rectangle is not empty, apply properties from rectangle.
+ left = static_cast<f32>(framebuffer.crop_rect.left);
+ top = static_cast<f32>(framebuffer.crop_rect.top);
+ right = static_cast<f32>(framebuffer.crop_rect.right);
+ bottom = static_cast<f32>(framebuffer.crop_rect.bottom);
+ } else {
+ // Otherwise, fall back to framebuffer dimensions.
+ left = 0;
+ top = 0;
+ right = static_cast<f32>(framebuffer.width);
+ bottom = static_cast<f32>(framebuffer.height);
+ }
+
+ // Apply transformation flags.
+ auto framebuffer_transform_flags = framebuffer.transform_flags;
+
+ if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) {
+ // Switch left and right.
+ std::swap(left, right);
+ }
+ if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) {
+ // Switch top and bottom.
+ std::swap(top, bottom);
+ }
+
+ framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH;
+ framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV;
+ if (True(framebuffer_transform_flags)) {
+ UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
+ static_cast<u32>(framebuffer_transform_flags));
+ }
+
+ // Get the screen properties.
+ const f32 screen_width = static_cast<f32>(screen_info.width);
+ const f32 screen_height = static_cast<f32>(screen_info.height);
+
+ // Normalize coordinate space.
+ left /= screen_width;
+ top /= screen_height;
+ right /= screen_width;
+ bottom /= screen_height;
+
+ return Common::Rectangle<f32>(left, top, right, bottom);
+}
+
void BlitScreen::Recreate() {
present_manager.WaitPresent();
scheduler.Finish();
@@ -354,17 +404,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view);
}
if (fsr) {
- auto crop_rect = framebuffer.crop_rect;
- if (crop_rect.GetWidth() == 0) {
- crop_rect.right = framebuffer.width;
- }
- if (crop_rect.GetHeight() == 0) {
- crop_rect.bottom = framebuffer.height;
- }
- crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
- VkExtent2D fsr_input_size{
- .width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
- .height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
+ const auto crop_rect = NormalizeCrop(framebuffer, screen_info);
+ const VkExtent2D fsr_input_size{
+ .width = Settings::values.resolution_info.ScaleUp(screen_info.width),
+ .height = Settings::values.resolution_info.ScaleUp(screen_info.height),
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
@@ -1397,61 +1440,37 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou
void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const {
- const auto& framebuffer_transform_flags = framebuffer.transform_flags;
- const auto& framebuffer_crop_rect = framebuffer.crop_rect;
-
- static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
- auto left = texcoords.left;
- auto right = texcoords.right;
-
- switch (framebuffer_transform_flags) {
- case Service::android::BufferTransformFlags::Unset:
- break;
- case Service::android::BufferTransformFlags::FlipV:
- // Flip the framebuffer vertically
- left = texcoords.right;
- right = texcoords.left;
- break;
- default:
- UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
- static_cast<u32>(framebuffer_transform_flags));
- break;
- }
+ f32 left, top, right, bottom;
- UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
-
- f32 left_start{};
- if (framebuffer_crop_rect.Top() > 0) {
- left_start = static_cast<f32>(framebuffer_crop_rect.Top()) /
- static_cast<f32>(framebuffer_crop_rect.Bottom());
- }
- f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
- f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
- // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
- // (e.g. handheld mode) on a 1920x1080 framebuffer.
- if (!fsr) {
- if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
- static_cast<f32>(screen_info.width);
- }
- if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
- static_cast<f32>(screen_info.height);
- }
+ if (fsr) {
+ // FSR has already applied the crop, so we just want to render the image
+ // it has produced.
+ left = 0;
+ top = 0;
+ right = 1;
+ bottom = 1;
+ } else {
+ // Get the normalized crop rectangle.
+ const auto crop = NormalizeCrop(framebuffer, screen_info);
+
+ // Apply the crop.
+ left = crop.left;
+ top = crop.top;
+ right = crop.right;
+ bottom = crop.bottom;
}
+ // Map the coordinates to the screen.
const auto& screen = layout.screen;
const auto x = static_cast<f32>(screen.left);
const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth());
const auto h = static_cast<f32>(screen.GetHeight());
- data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v);
- data.vertices[1] =
- ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v);
- data.vertices[2] =
- ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v);
- data.vertices[3] =
- ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v);
+
+ data.vertices[0] = ScreenRectVertex(x, y, left, top);
+ data.vertices[1] = ScreenRectVertex(x + w, y, right, top);
+ data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom);
+ data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom);
}
void BlitScreen::CreateSMAA(VkExtent2D smaa_size) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 976c3f6a6..5958f52f7 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
- : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
- device{&runtime.device}, buffer{
- CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
+ device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
+ tracker{SizeBytes()} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@@ -359,12 +359,31 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment());
}
+void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
+ for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
+ it->ResetUsageTracking();
+ }
+}
+
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
+bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ if (Settings::values.disable_buffer_reorder) {
+ return false;
+ }
+ const bool can_use_upload_cmdbuf =
+ std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
+ return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
+ });
+ return can_use_upload_cmdbuf;
+}
+
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
- std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier,
+ bool can_reorder_upload) {
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
return;
}
@@ -380,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
+
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
+ if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
+ scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
+ vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
+ upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ });
+ return;
+ }
+
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 833dfac45..0b3fbd6d0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,6 +5,7 @@
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
+#include "video_core/buffer_cache/usage_tracker.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -34,6 +35,18 @@ public:
return *buffer;
}
+ [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
+ return tracker.IsUsed(offset, size);
+ }
+
+ void MarkUsage(u64 offset, u64 size) noexcept {
+ tracker.Track(offset, size);
+ }
+
+ void ResetUsageTracking() noexcept {
+ tracker.Reset();
+ }
+
operator VkBuffer() const noexcept {
return *buffer;
}
@@ -49,6 +62,7 @@ private:
const Device* device{};
vk::Buffer buffer;
std::vector<BufferView> views;
+ VideoCommon::UsageTracker tracker;
};
class QuadArrayIndexBuffer;
@@ -67,6 +81,8 @@ public:
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool);
+ void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
+
void Finish();
u64 GetDeviceLocalMemory() const;
@@ -81,12 +97,15 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
+ bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
+
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
- std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier,
+ bool can_reorder_upload = false);
void PostCopyBarrier();
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index ce8f3f3c2..f7a05fbc0 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -34,7 +34,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
}
VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
- VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
+ VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) {
UpdateDescriptorSet(image_index, image_view);
@@ -61,15 +61,21 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
+ const f32 input_image_width = static_cast<f32>(input_image_extent.width);
+ const f32 input_image_height = static_cast<f32>(input_image_extent.height);
+ const f32 output_image_width = static_cast<f32>(output_size.width);
+ const f32 output_image_height = static_cast<f32>(output_size.height);
+ const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
+ const f32 viewport_x = crop_rect.left * input_image_width;
+ const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
+ const f32 viewport_y = crop_rect.top * input_image_height;
+
std::array<u32, 4 * 4> push_constants;
- FsrEasuConOffset(
- push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
- push_constants.data() + 12,
-
- static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
- static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
- static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
- static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
+ FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4,
+ push_constants.data() + 8, push_constants.data() + 12,
+
+ viewport_width, viewport_height, input_image_width, input_image_height,
+ output_image_width, output_image_height, viewport_x, viewport_y);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 8bb9fc23a..3505c1416 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -17,7 +17,7 @@ public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size);
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
- VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
+ VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect);
private:
void CreateDescriptorPool();
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 6b288b994..ac8b6e838 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
Refresh();
}
-VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
- VkSemaphore wait_semaphore, u64 host_tick) {
+VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
+ VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
+ u64 host_tick) {
if (semaphore) {
- return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+ return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
+ host_tick);
} else {
- return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+ return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
}
}
@@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
};
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
+ vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick) {
const VkSemaphore timeline_semaphore = *semaphore;
@@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
const std::array signal_values{host_tick, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+ const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
+
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const VkTimelineSemaphoreSubmitInfo timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(),
- .commandBufferCount = 1,
- .pCommandBuffers = cmdbuf.address(),
+ .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
+ .pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
@@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
return device.GetGraphicsQueue().Submit(submit_info);
}
-VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
- VkSemaphore wait_semaphore, u64 host_tick) {
+VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
+ vk::CommandBuffer& upload_cmdbuf,
+ VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
+ u64 host_tick) {
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
+ const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
+
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(),
- .commandBufferCount = 1,
- .pCommandBuffers = cmdbuf.address(),
+ .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
+ .pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = &signal_semaphore,
};
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 3f599d7bd..7dfb93ffb 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -52,14 +52,16 @@ public:
void Wait(u64 tick);
/// Submits the device graphics queue, updating the tick as necessary
- VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
- VkSemaphore wait_semaphore, u64 host_tick);
+ VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
+ VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
private:
- VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
- VkSemaphore wait_semaphore, u64 host_tick);
- VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
- VkSemaphore wait_semaphore, u64 host_tick);
+ VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
+ VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
+ u64 host_tick);
+ VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
+ VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
+ u64 host_tick);
void WaitThread(std::stop_token token);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 0d604eee3..2a13b2a72 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.y_negate = key.state.y_negate != 0;
return info;
}
+
+size_t GetTotalPipelineWorkers() {
+ const size_t max_core_threads =
+ std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
+#ifdef ANDROID
+ // Leave at least a few cores free in android
+ constexpr size_t free_cores = 3ULL;
+ if (max_core_threads <= free_cores) {
+ return 1ULL;
+ }
+ return max_core_threads - free_cores;
+#else
+ return max_core_threads;
+#endif
+}
+
} // Anonymous namespace
size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
-#ifdef ANDROID
- workers(1, "VkPipelineBuilder"),
-#else
- workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
-#endif
+ workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(),
+ "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const VkDriverId driver_id{device.GetDriverID()};
@@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_native_ndc = device.IsExtDepthClipControlSupported(),
.support_scaled_attributes = !device.MustEmulateScaledFormats(),
+ .support_multi_viewport = device.SupportsMultiViewport(),
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 66c03bf17..078777cdd 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -211,6 +211,13 @@ public:
return;
}
PauseCounter();
+ const auto driver_id = device.GetDriverID();
+ if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
+ driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
+ pending_sync.clear();
+ sync_values_stash.clear();
+ return;
+ }
sync_values_stash.clear();
sync_values_stash.emplace_back();
std::vector<HostSyncValues>* sync_values = &sync_values_stash.back();
@@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
return true;
}
+ auto driver_id = impl->device.GetDriverID();
+ if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
+ driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
+ return true;
+ }
+
for (size_t i = 0; i < 2; i++) {
is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 059b7cb40..e0ab1eaac 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -82,7 +82,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
}
if (y_negate) {
- y += height;
+ y += conv(static_cast<f32>(regs.surface_clip.height));
height = -height;
}
@@ -199,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
if (!pipeline) {
return;
}
- std::scoped_lock lock{LockCaches()};
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
// update engine as channel may be different.
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
@@ -621,7 +621,7 @@ void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
}
{
std::scoped_lock lock{buffer_cache.mutex};
- buffer_cache.CachedWriteMemory(addr, size);
+ buffer_cache.WriteMemory(addr, size);
}
pipeline_cache.InvalidateRegion(addr, size);
}
@@ -710,7 +710,6 @@ void RasterizerVulkan::TiledCacheBarrier() {
}
void RasterizerVulkan::FlushCommands() {
- std::scoped_lock lock{LockCaches()};
if (draw_counter == 0) {
return;
}
@@ -808,7 +807,6 @@ void RasterizerVulkan::FlushWork() {
if ((++draw_counter & 7) != 7) {
return;
}
- std::scoped_lock lock{LockCaches()};
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
@@ -923,9 +921,13 @@ void RasterizerVulkan::UpdateDynamicStates() {
}
void RasterizerVulkan::HandleTransformFeedback() {
+ static std::once_flag warn_unsupported;
+
const auto& regs = maxwell3d->regs;
if (!device.IsExtTransformFeedbackSupported()) {
- LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
+ std::call_once(warn_unsupported, [&] {
+ LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
+ });
return;
}
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
@@ -1503,7 +1505,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
CreateChannel(channel);
{
- std::scoped_lock lock{LockCaches()};
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CreateChannel(channel);
buffer_cache.CreateChannel(channel);
}
@@ -1516,7 +1518,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
const s32 channel_id = channel.bind_id;
BindToChannel(channel_id);
{
- std::scoped_lock lock{LockCaches()};
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.BindToChannel(channel_id);
buffer_cache.BindToChannel(channel_id);
}
@@ -1529,7 +1531,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
EraseChannel(channel_id);
{
- std::scoped_lock lock{LockCaches()};
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.EraseChannel(channel_id);
buffer_cache.EraseChannel(channel_id);
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ce3dfbaab..ad069556c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -133,10 +133,6 @@ public:
void ReleaseChannel(s32 channel_id) override;
- std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() {
- return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex};
- }
-
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 3be7837f4..146923db4 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -22,11 +22,12 @@ namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
-void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
+void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+ vk::CommandBuffer upload_cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
- command->Execute(cmdbuf);
+ command->Execute(cmdbuf, upload_cmdbuf);
command->~Command();
command = next;
}
@@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
// Perform the work, tracking whether the chunk was a submission
// before executing.
const bool has_submit = work->HasSubmit();
- work->ExecuteAll(current_cmdbuf);
+ work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
// If the chunk was a submission, reallocate the command buffer.
if (has_submit) {
@@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
+ current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+ current_upload_cmdbuf.Begin({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .pInheritanceInfo = nullptr,
+ });
}
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
- Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+ RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
+ this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
+ upload_cmdbuf.End();
cmdbuf.End();
if (on_submit) {
@@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue(
- cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
+ cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index da03803aa..f8d8ca80a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -80,7 +80,8 @@ public:
/// Send work to a separate thread.
template <typename T>
- void Record(T&& command) {
+ requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
+ void RecordWithUploadBuffer(T&& command) {
if (chunk->Record(command)) {
return;
}
@@ -88,6 +89,15 @@ public:
(void)chunk->Record(command);
}
+ template <typename T>
+ requires std::is_invocable_v<T, vk::CommandBuffer>
+ void Record(T&& c) {
+ this->RecordWithUploadBuffer(
+ [command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
+ command(cmdbuf);
+ });
+ }
+
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
@@ -119,7 +129,7 @@ private:
public:
virtual ~Command() = default;
- virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
+ virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
Command* GetNext() const {
return next;
@@ -142,8 +152,8 @@ private:
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
- void Execute(vk::CommandBuffer cmdbuf) const override {
- command(cmdbuf);
+ void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
+ command(cmdbuf, upload_cmdbuf);
}
private:
@@ -152,7 +162,7 @@ private:
class CommandChunk final {
public:
- void ExecuteAll(vk::CommandBuffer cmdbuf);
+ void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
template <typename T>
bool Record(T& command) {
@@ -228,6 +238,7 @@ private:
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
+ vk::CommandBuffer current_upload_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
std::function<void()> on_submit;
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index 5efd7d66e..70644ea82 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
- scheduler.Record([&](vk::CommandBuffer& cmdbuf) {
+ scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& images : m_dynamic_images) {
for (size_t i = 0; i < MaxDynamicImage; i++) {
ClearColorImage(cmdbuf, *images.images[i]);
@@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
UpdateDescriptorSets(source_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) {
+ scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d3deb9072..f63a20327 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -36,6 +36,10 @@ public:
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
void FreeDeferred(StagingBufferRef& ref);
+ [[nodiscard]] VkBuffer StreamBuf() const noexcept {
+ return *stream_buffer;
+ }
+
void TickFrame();
private:
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index de34f6d49..5dbec2e62 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
-ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
- : VideoCommon::ImageViewBase{params} {}
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
+ : VideoCommon::ImageViewBase{params}, device{&runtime.device} {
+ if (device->HasNullDescriptor()) {
+ return;
+ }
+
+ // Handle fallback for devices without nullDescriptor
+ ImageInfo info{};
+ info.format = PixelFormat::A8B8G8R8_UNORM;
+
+ null_image = MakeImage(*device, runtime.memory_allocator, info, {});
+ image_handle = *null_image;
+ for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) {
+ image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT);
+ }
+}
ImageView::~ImageView() = default;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 7a0807709..edf5d7635 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -267,6 +267,7 @@ private:
vk::ImageView depth_view;
vk::ImageView stencil_view;
vk::ImageView color_view;
+ vk::Image null_image;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;