From 6ac97405df021d5d2bd9a529253bd5c5a418c1a9 Mon Sep 17 00:00:00 2001 From: ameerj Date: Tue, 28 Jul 2020 00:08:02 -0400 Subject: Vk Async pipeline compilation --- src/video_core/renderer_vulkan/vk_device.cpp | 2 + src/video_core/renderer_vulkan/vk_device.h | 5 ++ .../renderer_vulkan/vk_fence_manager.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 5 +- .../renderer_vulkan/vk_graphics_pipeline.h | 6 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 24 +++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 27 +++++++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 27 +++++++++- src/video_core/renderer_vulkan/vk_rasterizer.h | 10 ++++ src/video_core/renderer_vulkan/wrapper.cpp | 2 +- src/video_core/renderer_vulkan/wrapper.h | 2 +- src/video_core/shader/async_shaders.cpp | 59 ++++++++++++++++++++-- src/video_core/shader/async_shaders.h | 31 +++++++++++- 13 files changed, 182 insertions(+), 20 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 0c03e4d83..ebcfaa0e3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -382,6 +382,8 @@ bool VKDevice::Create() { graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); return true; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 529744f2d..30cd3e189 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -202,6 +202,10 @@ public: return reported_extensions; } + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + /// Checks if the physical device is suitable. static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); @@ -251,6 +255,7 @@ private: bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool use_asynchronous_shaders{}; // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index a02be5487..d7f65d435 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -29,7 +29,7 @@ void InnerFence::Queue() { } ASSERT(!event); - event = device.GetLogical().CreateEvent(); + event = device.GetLogical().CreateNewEvent(); ticks = scheduler.Ticks(); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index aaf930b90..7d51b9836 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -84,9 +84,8 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( program)}, - renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( - key.renderpass_params, - program)} {} + renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, + pipeline{CreatePipeline(key.renderpass_params, program)}, m_key{key} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index a1d699a6c..39c73a139 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -54,6 +54,10 @@ public: return renderpass; } + const GraphicsPipelineCacheKey& GetCacheKey() { + return m_key; + } + private: vk::DescriptorSetLayout CreateDescriptorSetLayout( vk::Span bindings) const; @@ -82,6 +86,8 @@ private: VkRenderPass renderpass; vk::Pipeline pipeline; + + const GraphicsPipelineCacheKey& m_key; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 418c62bc4..45d4dcb8c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -205,7 +205,8 @@ std::array VKPipelineCache::GetShaders() { return last_shaders = shaders; } -VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { +VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline( + const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { @@ -213,11 +214,27 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineC } last_graphics_key = key; + if (device.UseAsynchronousShaders()) { + auto work = async_shaders.GetCompletedWork(); + for (std::size_t i = 0; i < work.size(); ++i) { + auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey()); + entry = std::move(work[i].pipeline); + } + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); + if (is_cache_miss) { + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + const auto [program, bindings] = DecompileShaders(key.fixed_state); + async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params, + key.padding, key.shaders, key.fixed_state); + } + return *(last_graphics_pipeline = graphics_cache.at(key).get()); + } + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key); + const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); @@ -312,8 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { } std::pair> -VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { - const auto& fixed_state = key.fixed_state; +VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { auto& memory_manager = system.GPU().MemoryManager(); const auto& gpu = system.GPU().Maxwell3D(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a3fe65fb..c70da6da4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -22,6 +22,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -152,16 +153,37 @@ public: std::array GetShaders(); - VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); + VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + const VKDevice& GetDevice() { + return device; + } + + VKScheduler& GetScheduler() { + return scheduler; + } + + VKDescriptorPool& GetDescriptorPool() { + return descriptor_pool; + } + + VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() { + return update_descriptor_queue; + } + + VKRenderPassCache& GetRenderpassCache() { + return renderpass_cache; + } + protected: void OnShaderRemoval(Shader* shader) final; private: std::pair> DecompileShaders( - const GraphicsPipelineCacheKey& key); + const FixedPipelineState& fixed_state); Core::System& system; const VKDevice& device; @@ -177,6 +199,7 @@ private: GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; + std::vector> duplicates; std::unordered_map> graphics_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7500e8244..6310e898c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -400,8 +400,25 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), sampler_cache(device), fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { + query_cache(system, *this, device, scheduler), + wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { scheduler.SetQueryCache(query_cache); + if (device.UseAsynchronousShaders()) { + // Max worker threads we should allow + constexpr auto MAX_THREADS = 2u; + // Amount of threads we should reserve for other parts of yuzu + constexpr auto RESERVED_THREADS = 6u; + // Get the amount of threads we can use(this can return zero) + const auto cpu_thread_count = + std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); + // Deduce how many "extra" threads we have to use. + const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1u, max_threads_unused); + // Don't use more than MAX_THREADS + const auto worker_count = std::min(max_worker_count, MAX_THREADS); + async_shaders.AllocateWorkers(worker_count); + } } RasterizerVulkan::~RasterizerVulkan() = default; @@ -439,7 +456,13 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); + auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) { + // Async graphics pipeline was not ready. + system.GPU().TickWork(); + return; + } + scheduler.BindGraphicsPipeline(pipeline.GetHandle()); const auto renderpass = pipeline.GetRenderPass(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 923178b0b..27604b9a3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" namespace Core { class System; @@ -136,6 +137,14 @@ public: u32 pixel_stride) override; void SetupDirtyFlags() override; + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, @@ -278,6 +287,7 @@ private: VKMemoryManager& memory_manager; StateTracker& state_tracker; VKScheduler& scheduler; + VideoCommon::Shader::AsyncShaders async_shaders; VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 14cac38ea..c43d60adf 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons return ShaderModule(object, handle, *dld); } -Event Device::CreateEvent() const { +Event Device::CreateNewEvent() const { static constexpr VkEventCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 31885ef42..b9d3fedc1 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -721,7 +721,7 @@ public: ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; - Event CreateEvent() const; + Event CreateNewEvent() const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index b7f66d7ee..335a0d05b 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -113,15 +113,38 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, VAddr cpu_addr) { WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM : AsyncShaders::Backend::OpenGL, - device, + &device, shader_type, uid, std::move(code), std::move(code_b), main_offset, compiler_settings, - registry, + ®istry, cpu_addr}; + + std::unique_lock lock(queue_mutex); + pending_queue.push_back(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::QueueVulkanShader( + Vulkan::VKPipelineCache* pp_cache, std::vector bindings, + Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, u32 padding, + std::array shaders, + Vulkan::FixedPipelineState fixed_state) { + + WorkerParams params{ + .backend = AsyncShaders::Backend::Vulkan, + .pp_cache = pp_cache, + .bindings = bindings, + .program = program, + .renderpass_params = renderpass_params, + .padding = padding, + .shaders = shaders, + .fixed_state = fixed_state, + }; + std::unique_lock lock(queue_mutex); pending_queue.push_back(std::move(params)); cv.notify_one(); @@ -140,6 +163,7 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context if (!HasWorkQueued()) { continue; } + // Another thread beat us, just unlock and wait for the next load if (pending_queue.empty()) { continue; @@ -152,10 +176,11 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context if (work.backend == AsyncShaders::Backend::OpenGL || work.backend == AsyncShaders::Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + VideoCommon::Shader::Registry registry = *work.registry; + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry); const auto scope = context->Acquire(); auto program = - OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry); Result result{}; result.backend = work.backend; result.cpu_address = work.cpu_address; @@ -174,6 +199,32 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context std::unique_lock complete_lock(completed_mutex); finished_work.push_back(std::move(result)); } + + } else if (work.backend == AsyncShaders::Backend::Vulkan) { + Vulkan::GraphicsPipelineCacheKey params_key{ + work.renderpass_params, + work.padding, + work.shaders, + work.fixed_state, + }; + { + std::unique_lock complete_lock(completed_mutex); + + // Duplicate creation of pipelines leads to instability and crashing, caused by a + // race condition but band-aid solution is locking the making of the pipeline + // results in only one pipeline created at a time. + Result result{ + .backend = work.backend, + .pipeline = std::make_unique( + work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(), + work.pp_cache->GetDescriptorPool(), + work.pp_cache->GetUpdateDescriptorQueue(), + work.pp_cache->GetRenderpassCache(), params_key, work.bindings, + work.program), + }; + + finished_work.push_back(std::move(result)); + } } } } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 2f5ee94ad..702026ce2 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -14,6 +14,10 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" namespace Core::Frontend { class EmuWindow; @@ -24,6 +28,10 @@ namespace Tegra { class GPU; } +namespace Vulkan { +class VKPipelineCache; +} + namespace VideoCommon::Shader { class AsyncShaders { @@ -31,6 +39,7 @@ public: enum class Backend { OpenGL, GLASM, + Vulkan, }; struct ResultPrograms { @@ -46,6 +55,7 @@ public: std::vector code; std::vector code_b; Tegra::Engines::ShaderType shader_type; + std::unique_ptr pipeline; }; explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); @@ -76,6 +86,13 @@ public: VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + std::vector bindings, + Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, + u32 padding, + std::array shaders, + Vulkan::FixedPipelineState fixed_state); + private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -84,15 +101,25 @@ private: struct WorkerParams { AsyncShaders::Backend backend; - OpenGL::Device device; + // For OGL + const OpenGL::Device* device; Tegra::Engines::ShaderType shader_type; u64 uid; std::vector code; std::vector code_b; u32 main_offset; VideoCommon::Shader::CompilerSettings compiler_settings; - VideoCommon::Shader::Registry registry; + const VideoCommon::Shader::Registry* registry; VAddr cpu_address; + + // For Vulkan + Vulkan::VKPipelineCache* pp_cache; + std::vector bindings; + Vulkan::SPIRVProgram program; + Vulkan::RenderPassParams renderpass_params; + u32 padding; + std::array shaders; + Vulkan::FixedPipelineState fixed_state; }; std::condition_variable cv; -- cgit v1.2.3