diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/async_shaders.cpp | 170 | ||||
-rw-r--r-- | src/video_core/shader/async_shaders.h | 107 |
2 files changed, 277 insertions, 0 deletions
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..fb94ac2e7 --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp @@ -0,0 +1,170 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/shader/async_shaders.h" + +namespace VideoCommon::Shader { +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} +AsyncShaders::~AsyncShaders() { + KillWorkers(); +} + +void AsyncShaders::AllocateWorkers(std::size_t num_workers) { + // If we're already have workers queued or don't want to queue workers, ignore + if (num_workers == worker_threads.size() || num_workers == 0) { + return; + } + + // If workers already exist, clear them + if (!worker_threads.empty()) { + FreeWorkers(); + } + + // Create workers + for (std::size_t i = 0; i < num_workers; i++) { + context_list.push_back(emu_window.CreateSharedContext()); + worker_threads.push_back(std::move( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + } +} + +void AsyncShaders::FreeWorkers() { + // Mark all threads to quit + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.join(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +void AsyncShaders::KillWorkers() { + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.detach(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +bool AsyncShaders::HasWorkQueued() { + std::shared_lock lock(queue_mutex); + return !pending_queue.empty(); +} + +bool AsyncShaders::HasCompletedWork() { + std::shared_lock lock(completed_mutex); + return !finished_work.empty(); +} + +bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { + const auto& regs = gpu.Maxwell3D().regs; + + // If something is using depth, we can assume that games are not rendering anything which will + // be used one time. + if (regs.zeta_enable) { + return true; + } + + // If games are using a small index count, we can assume these are full screen quads. Usually + // these shaders are only used once for building textures so we can assume they can't be built + // async + if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { + return false; + } + + return true; +} + +std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { + std::vector<AsyncShaders::Result> results; + { + std::unique_lock lock(completed_mutex); + results.assign(std::make_move_iterator(finished_work.begin()), + std::make_move_iterator(finished_work.end())); + finished_work.clear(); + } + return results; +} + +void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, + Tegra::Engines::ShaderType shader_type, u64 uid, + std::vector<u64> code, std::vector<u64> code_b, + u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, + VAddr cpu_addr) { + WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM + : AsyncShaders::Backend::OpenGL, + device, + shader_type, + uid, + std::move(code), + std::move(code_b), + main_offset, + compiler_settings, + registry, + cpu_addr}; + std::unique_lock lock(queue_mutex); + pending_queue.push_back(std::move(params)); +} + +void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { + using namespace std::chrono_literals; + while (!is_thread_exiting.load(std::memory_order_relaxed)) { + // Partial lock to allow all threads to read at the same time + if (!HasWorkQueued()) { + continue; + } + // Complete lock for pulling workload + queue_mutex.lock(); + // Another thread beat us, just unlock and wait for the next load + if (pending_queue.empty()) { + queue_mutex.unlock(); + continue; + } + // Pull work from queue + WorkerParams work = std::move(pending_queue.front()); + pending_queue.pop_front(); + queue_mutex.unlock(); + + if (work.backend == AsyncShaders::Backend::OpenGL || + work.backend == AsyncShaders::Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + const auto scope = context->Acquire(); + auto program = + OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + Result result{}; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; + + if (work.backend == AsyncShaders::Backend::OpenGL) { + result.program.opengl = std::move(program->source_program); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + result.program.glasm = std::move(program->assembly_program); + } + + { + std::unique_lock complete_lock(completed_mutex); + finished_work.push_back(std::move(result)); + } + } + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..26bc38326 --- /dev/null +++ b/src/video_core/shader/async_shaders.h @@ -0,0 +1,107 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <deque> +#include <memory> +#include <shared_mutex> +#include <thread> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" + +namespace Core::Frontend { +class EmuWindow; +class GraphicsContext; +} // namespace Core::Frontend + +namespace Tegra { +class GPU; +} + +namespace VideoCommon::Shader { + +class AsyncShaders { +public: + enum class Backend { + OpenGL, + GLASM, + }; + + struct ResultPrograms { + OpenGL::OGLProgram opengl; + OpenGL::OGLAssemblyProgram glasm; + }; + + struct Result { + u64 uid; + VAddr cpu_address; + Backend backend; + ResultPrograms program; + std::vector<u64> code; + std::vector<u64> code_b; + Tegra::Engines::ShaderType shader_type; + }; + + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + ~AsyncShaders(); + + /// Start up shader worker threads + void AllocateWorkers(std::size_t num_workers); + + /// Clear the shader queue and kill all worker threads + void FreeWorkers(); + + // Force end all threads + void KillWorkers(); + + /// Check our worker queue to see if we have any work queued already + bool HasWorkQueued(); + + /// Check to see if any shaders have actually been compiled + bool HasCompletedWork(); + + /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build + /// every shader async as some shaders are only built and executed once. We try to "guess" which + /// shader would be used only once + bool IsShaderAsync(const Tegra::GPU& gpu) const; + + /// Pulls completed compiled shaders + std::vector<Result> GetCompletedWork(); + + void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, + u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + +private: + void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); + + struct WorkerParams { + AsyncShaders::Backend backend; + OpenGL::Device device; + Tegra::Engines::ShaderType shader_type; + u64 uid; + std::vector<u64> code; + std::vector<u64> code_b; + u32 main_offset; + VideoCommon::Shader::CompilerSettings compiler_settings; + VideoCommon::Shader::Registry registry; + VAddr cpu_address; + }; + + std::shared_mutex queue_mutex; + std::shared_mutex completed_mutex; + std::atomic<bool> is_thread_exiting{}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; + std::vector<std::thread> worker_threads; + std::deque<WorkerParams> pending_queue; + std::vector<AsyncShaders::Result> finished_work; + Core::Frontend::EmuWindow& emu_window; +}; + +} // namespace VideoCommon::Shader |