From bd8b9bbcee93549f323352f227ff44d0e79e0ad4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Feb 2020 16:13:47 -0300 Subject: gl_shader_cache: Rework shader cache and remove post-specializations Instead of pre-specializing shaders and then post-specializing them, drop the later and only "specialize" the shader while decoding it. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 503 +++++++-------------- 1 file changed, 158 insertions(+), 345 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_shader_cache.cpp') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cb89db8c..e3a1d5a5f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -2,12 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include #include #include #include #include + #include + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" @@ -56,7 +60,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { } /// Calculates the size of a program stream -std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { +std::size_t CalculateProgramSize(const ProgramCode& program) { constexpr std::size_t start_offset = 10; // This is the encoded version of BRA that jumps to itself. All Nvidia // shaders end with one. @@ -109,32 +113,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { } } -/// Describes primitive behavior on geometry shaders -constexpr std::pair GetPrimitiveDescription(GLenum primitive_mode) { - switch (primitive_mode) { - case GL_POINTS: - return {"points", 1}; - case GL_LINES: - case GL_LINE_STRIP: - return {"lines", 2}; - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: - return {"lines_adjacency", 4}; - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: - return {"triangles", 3}; - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: - return {"triangles_adjacency", 6}; - default: - return {"points", 1}; - } -} - /// Hashes one (or two) program streams u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { + const ProgramCode& code_b = {}) { u64 unique_identifier = boost::hash_value(code); if (is_a) { // VertexA programs include two programs @@ -143,24 +124,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co return unique_identifier; } -/// Creates an unspecialized program from code streams -std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, - const std::optional& ir_b) { - switch (shader_type) { - case ShaderType::Vertex: - return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); - case ShaderType::Geometry: - return GLShader::GenerateGeometryShader(device, ir); - case ShaderType::Fragment: - return GLShader::GenerateFragmentShader(device, ir); - case ShaderType::Compute: - return GLShader::GenerateComputeShader(device, ir); - default: - UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast(shader_type)); - return {}; - } -} - constexpr const char* GetShaderTypeName(ShaderType shader_type) { switch (shader_type) { case ShaderType::Vertex: @@ -196,102 +159,35 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { return {}; } -std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { +std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); } -Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, - ShaderType shader_type) { - if (shader_type == ShaderType::Compute) { - return system.GPU().KeplerCompute(); - } else { - return system.GPU().Maxwell3D(); - } -} - -std::unique_ptr MakeLocker(Core::System& system, ShaderType shader_type) { - return std::make_unique(shader_type, - GetConstBufferEngineInterface(system, shader_type)); -} - -void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { - locker.SetBoundBuffer(usage.bound_buffer); - for (const auto& key : usage.keys) { - const auto [buffer, offset] = key.first; - locker.InsertKey(buffer, offset, key.second); +std::shared_ptr MakeLocker(const ShaderDiskCacheEntry& entry) { + const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; + auto locker = std::make_shared(entry.type, guest_profile); + locker->SetBoundBuffer(entry.bound_buffer); + for (const auto& [address, value] : entry.keys) { + const auto [buffer, offset] = address; + locker->InsertKey(buffer, offset, value); } - for (const auto& [offset, sampler] : usage.bound_samplers) { - locker.InsertBoundSampler(offset, sampler); + for (const auto& [offset, sampler] : entry.bound_samplers) { + locker->InsertBoundSampler(offset, sampler); } - for (const auto& [key, sampler] : usage.bindless_samplers) { + for (const auto& [key, sampler] : entry.bindless_samplers) { const auto [buffer, offset] = key; - locker.InsertBindlessSampler(buffer, offset, sampler); + locker->InsertBindlessSampler(buffer, offset, sampler); } + return locker; } -CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, - const ProgramCode& code, const ProgramCode& code_b, - ConstBufferLocker& locker, const ProgramVariant& variant, - bool hint_retrievable = false) { - LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); - - const bool is_compute = shader_type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); - std::optional ir_b; - if (!code_b.empty()) { - ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); - } - - std::string source = fmt::format(R"(// {} -#version 430 core -#extension GL_ARB_separate_shader_objects : enable -)", - GetShaderId(unique_identifier, shader_type)); - if (device.HasShaderBallot()) { - source += "#extension GL_ARB_shader_ballot : require\n"; - } - if (device.HasVertexViewportLayer()) { - source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; - } - if (device.HasImageLoadFormatted()) { - source += "#extension GL_EXT_shader_image_load_formatted : require\n"; - } - if (device.HasWarpIntrinsics()) { - source += "#extension GL_NV_gpu_shader5 : require\n" - "#extension GL_NV_shader_thread_group : require\n" - "#extension GL_NV_shader_thread_shuffle : require\n"; - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations) - // on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - source += "#pragma optionNV(fastmath off)\n"; - - if (shader_type == ShaderType::Geometry) { - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); - source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); - source += fmt::format("layout ({}) in;\n", glsl_topology); - } - if (shader_type == ShaderType::Compute) { - if (variant.local_memory_size > 0) { - source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", - Common::AlignUp(variant.local_memory_size, 4) / 4); - } - source += - fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", - variant.block_x, variant.block_y, variant.block_z); - - if (variant.shared_memory_size > 0) { - // shared_memory_size is described in number of words - source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); - } - } - - source += '\n'; - source += GenerateGLSL(device, shader_type, ir, ir_b); - +std::shared_ptr BuildShader(const Device& device, ShaderType shader_type, + u64 unique_identifier, const ShaderIR& ir, + bool hint_retrievable = false) { + LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); + const std::string glsl = DecompileShader(device, ir, shader_type); OGLShader shader; - shader.Create(source.c_str(), GetGLShaderType(shader_type)); + shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); auto program = std::make_shared(); program->Create(true, hint_retrievable, shader.handle); @@ -299,7 +195,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp } std::unordered_set GetSupportedFormats() { - GLint num_formats{}; + GLint num_formats; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); std::vector formats(num_formats); @@ -314,115 +210,81 @@ std::unordered_set GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, - GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) - : RasterizerCacheObject{params.host_ptr}, system{params.system}, - disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, shader_type{shader_type}, - entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { - if (!params.precompiled_variants) { - return; - } - for (const auto& pair : *params.precompiled_variants) { - auto locker = MakeLocker(system, shader_type); - const auto& usage = pair->first; - FillLocker(*locker, usage); - - std::unique_ptr* locker_variant = nullptr; - const auto it = - std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { - return variant->locker->HasEqualKeys(*locker); - }); - if (it == locker_variants.end()) { - locker_variant = &locker_variants.emplace_back(); - *locker_variant = std::make_unique(); - locker_variant->get()->locker = std::move(locker); - } else { - locker_variant = &*it; - } - locker_variant->get()->programs.emplace(usage.variant, pair->second); +CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, + std::shared_ptr locker, + ShaderEntries entries, std::shared_ptr program) + : RasterizerCacheObject{host_ptr}, locker{std::move(locker)}, entries{std::move(entries)}, + cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} + +CachedShader::~CachedShader() = default; + +GLuint CachedShader::GetHandle() const { + if (!locker->IsConsistent()) { + std::abort(); } + return program->handle; } Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, ProgramCode code_b) { const auto shader_type = GetShaderType(program_type); - params.disk_cache.SaveRaw( - ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); + const std::size_t size_in_bytes = code.size() * sizeof(u64); - ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + auto locker = std::make_shared(shader_type, params.system.GPU().Maxwell3D()); + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *locker); // TODO(Rodrigo): Handle VertexA shaders // std::optional ir_b; // if (!code_b.empty()) { // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); // } - return std::shared_ptr(new CachedShader( - params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); + auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); + + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = locker->GetBoundBuffer(); + entry.keys = locker->GetKeys(); + entry.bound_samplers = locker->GetBoundSamplers(); + entry.bindless_samplers = locker->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + return std::shared_ptr(new CachedShader(params.host_ptr, params.cpu_addr, + size_in_bytes, std::move(locker), + MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { - params.disk_cache.SaveRaw( - ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); - - ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, - params.system.GPU().KeplerCompute()); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); - return std::shared_ptr(new CachedShader( - params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + + auto locker = std::make_shared(Tegra::Engines::ShaderType::Compute, + params.system.GPU().KeplerCompute()); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *locker); + auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); + + ShaderDiskCacheEntry entry; + entry.type = ShaderType::Compute; + entry.code = std::move(code); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = locker->GetBoundBuffer(); + entry.keys = locker->GetKeys(); + entry.bound_samplers = locker->GetBoundSamplers(); + entry.bindless_samplers = locker->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + return std::shared_ptr(new CachedShader(params.host_ptr, params.cpu_addr, + size_in_bytes, std::move(locker), + MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, - const UnspecializedShader& unspecialized) { - return std::shared_ptr(new CachedShader(params, unspecialized.type, - unspecialized.entries, unspecialized.code, - unspecialized.code_b)); -} - -GLuint CachedShader::GetHandle(const ProgramVariant& variant) { - EnsureValidLockerVariant(); - - const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); - auto& program = entry->second; - if (!is_cache_miss) { - return program->handle; - } - - program = BuildShader(device, unique_identifier, shader_type, code, code_b, - *curr_locker_variant->locker, variant); - disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); - - LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); - return program->handle; -} - -bool CachedShader::EnsureValidLockerVariant() { - const auto previous_variant = curr_locker_variant; - if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { - curr_locker_variant = nullptr; - } - if (!curr_locker_variant) { - for (auto& variant : locker_variants) { - if (variant->locker->IsConsistent()) { - curr_locker_variant = variant.get(); - } - } - } - if (!curr_locker_variant) { - auto& new_variant = locker_variants.emplace_back(); - new_variant = std::make_unique(); - new_variant->locker = MakeLocker(system, shader_type); - curr_locker_variant = new_variant.get(); - } - return previous_variant == curr_locker_variant; -} - -ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, - const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, - locker.GetBoundBuffer(), locker.GetKeys(), - locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; + const PrecompiledShader& precompiled_shader, + std::size_t size_in_bytes) { + return std::shared_ptr( + new CachedShader(params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.locker, + precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -432,16 +294,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { - const auto transferable = disk_cache.LoadTransferable(); + const std::optional transferable = disk_cache.LoadTransferable(); if (!transferable) { return; } - const auto [raws, shader_usages] = *transferable; - if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { - return; - } - const auto dumps = disk_cache.LoadPrecompiled(); + const std::vector gl_cache = disk_cache.LoadPrecompiled(); const auto supported_formats = GetSupportedFormats(); // Track if precompiled cache was altered during loading to know if we have to @@ -450,77 +308,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, // Inform the frontend about shader build initialization if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); + callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); } std::mutex mutex; std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool compilation_failed = false; + std::atomic_bool gl_cache_failed = false; + + const auto find_precompiled = [&gl_cache](u64 id) { + return std::find_if(gl_cache.begin(), gl_cache.end(), + [id](const auto& entry) { return entry.unique_identifier == id; }); + }; - const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end, const std::vector& shader_usages, - const ShaderDumpsMap& dumps) { + const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end) { context->MakeCurrent(); SCOPE_EXIT({ return context->DoneCurrent(); }); for (std::size_t i = begin; i < end; ++i) { - if (stop_loading || compilation_failed) { + if (stop_loading) { return; } - const auto& usage{shader_usages[i]}; - const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; - const auto dump{dumps.find(usage)}; - - CachedProgram shader; - if (dump != dumps.end()) { - // If the shader is dumped, attempt to load it with - shader = GeneratePrecompiledProgram(dump->second, supported_formats); - if (!shader) { - compilation_failed = true; - return; + const auto& entry = (*transferable)[i]; + const u64 unique_identifier = entry.unique_identifier; + const auto it = find_precompiled(unique_identifier); + const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; + + const bool is_compute = entry.type == ShaderType::Compute; + const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + auto locker = MakeLocker(entry); + const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *locker); + + std::shared_ptr program; + if (precompiled_entry) { + // If the shader is precompiled, attempt to load it with + program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); + if (!program) { + gl_cache_failed = true; } } - if (!shader) { - auto locker{MakeLocker(system, unspecialized.type)}; - FillLocker(*locker, usage); - - shader = BuildShader(device, usage.unique_identifier, unspecialized.type, - unspecialized.code, unspecialized.code_b, *locker, - usage.variant, true); + if (!program) { + // Otherwise compile it from GLSL + program = BuildShader(device, entry.type, unique_identifier, ir, true); } + PrecompiledShader shader; + shader.program = std::move(program); + shader.locker = std::move(locker); + shader.entries = MakeEntries(ir); + std::scoped_lock lock{mutex}; if (callback) { callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - shader_usages.size()); + transferable->size()); } - - precompiled_programs.emplace(usage, std::move(shader)); - - // TODO(Rodrigo): Is there a better way to do this? - precompiled_variants[usage.unique_identifier].push_back( - precompiled_programs.find(usage)); + runtime_cache.emplace(entry.unique_identifier, std::move(shader)); } }; const auto num_workers{static_cast(std::thread::hardware_concurrency() + 1ULL)}; - const std::size_t bucket_size{shader_usages.size() / num_workers}; + const std::size_t bucket_size{transferable->size() / num_workers}; std::vector> contexts(num_workers); std::vector threads(num_workers); for (std::size_t i = 0; i < num_workers; ++i) { const bool is_last_worker = i + 1 == num_workers; const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; + const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; // On some platforms the shared context has to be created from the GUI thread contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); + threads[i] = std::thread(worker, contexts[i].get(), start, end); } for (auto& thread : threads) { thread.join(); } - if (compilation_failed) { + if (gl_cache_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); precompiled_cache_altered = true; @@ -533,11 +396,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw // before precompiling them - for (std::size_t i = 0; i < shader_usages.size(); ++i) { - const auto& usage{shader_usages[i]}; - if (dumps.find(usage) == dumps.end()) { - const auto& program{precompiled_programs.at(usage)}; - disk_cache.SaveDump(usage, program->handle); + for (std::size_t i = 0; i < transferable->size(); ++i) { + const u64 id = (*transferable)[i].unique_identifier; + const auto it = find_precompiled(id); + if (it == gl_cache.end()) { + const GLuint program = runtime_cache.at(id).program->handle; + disk_cache.SavePrecompiled(id, program); precompiled_cache_altered = true; } } @@ -547,80 +411,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { - const auto it = precompiled_variants.find(unique_identifier); - return it == precompiled_variants.end() ? nullptr : &it->second; -} - -CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheDump& dump, const std::unordered_set& supported_formats) { - if (supported_formats.find(dump.binary_format) == supported_formats.end()) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); +std::shared_ptr ShaderCacheOpenGL::GeneratePrecompiledProgram( + const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, + const std::unordered_set& supported_formats) { + if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { + LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); return {}; } - CachedProgram shader = std::make_shared(); - shader->handle = glCreateProgram(); - glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), - static_cast(dump.binary.size())); - - GLint link_status{}; - glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); + auto program = std::make_shared(); + program->handle = glCreateProgram(); + glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glProgramBinary(program->handle, precompiled_entry.binary_format, + precompiled_entry.binary.data(), + static_cast(precompiled_entry.binary.size())); + + GLint link_status; + glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); + LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); return {}; } - return shader; -} - -bool ShaderCacheOpenGL::GenerateUnspecializedShaders( - const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector& raws) { - if (callback) { - callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); - } - - for (std::size_t i = 0; i < raws.size(); ++i) { - if (stop_loading) { - return false; - } - const auto& raw{raws[i]}; - const u64 unique_identifier{raw.GetUniqueIdentifier()}; - const u64 calculated_hash{ - GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; - if (unique_identifier != calculated_hash) { - LOG_ERROR(Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - " - "removing shader cache", - raw.GetUniqueIdentifier(), calculated_hash); - disk_cache.InvalidateTransferable(); - return false; - } - - const u32 main_offset = - raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - ConstBufferLocker locker(raw.GetType()); - const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional ir_b; - // if (raw.HasProgramA()) { - // ir_b.emplace(raw.GetProgramCodeB(), main_offset); - // } - - UnspecializedShader unspecialized; - unspecialized.entries = GLShader::GetEntries(ir); - unspecialized.type = raw.GetType(); - unspecialized.code = raw.GetCode(); - unspecialized.code_b = raw.GetCodeB(); - unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); - - if (callback) { - callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); - } - } - return true; + return program; } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { @@ -648,17 +461,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; - const ShaderParameters params{system, disk_cache, precompiled_variants, device, + const ShaderParameters params{system, disk_cache, device, cpu_addr, host_ptr, unique_identifier}; - const auto found = unspecialized_shaders.find(unique_identifier); - if (found == unspecialized_shaders.end()) { + const auto found = runtime_cache.find(unique_identifier); + if (found == runtime_cache.end()) { shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); } else { - shader = CachedShader::CreateFromCache(params, found->second); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); } Register(shader); @@ -673,19 +486,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { return kernel; } - // No kernel found - create a new one + // No kernel found, create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; - const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; - const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); + const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{system, disk_cache, precompiled_variants, device, + const ShaderParameters params{system, disk_cache, device, cpu_addr, host_ptr, unique_identifier}; - const auto found = unspecialized_shaders.find(unique_identifier); - if (found == unspecialized_shaders.end()) { + const auto found = runtime_cache.find(unique_identifier); + if (found == runtime_cache.end()) { kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); } else { - kernel = CachedShader::CreateFromCache(params, found->second); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); } Register(kernel); -- cgit v1.2.3