From 114d6b2f97eb62c7d8c958ebb391b70b026130f9 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 01:21:16 -0800 Subject: VideoCore/Shader: Split interpreter and JIT into separate ShaderEngines --- src/video_core/CMakeLists.txt | 2 + src/video_core/pica.cpp | 2 +- src/video_core/shader/shader.cpp | 92 ++++------------------------ src/video_core/shader/shader.h | 5 +- src/video_core/shader/shader_interpreter.cpp | 39 ++++++++++-- src/video_core/shader/shader_interpreter.h | 19 +++--- src/video_core/shader/shader_jit_x64.cpp | 56 +++++++++++++++++ src/video_core/shader/shader_jit_x64.h | 35 +++++++++++ 8 files changed, 153 insertions(+), 97 deletions(-) create mode 100644 src/video_core/shader/shader_jit_x64.cpp create mode 100644 src/video_core/shader/shader_jit_x64.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 36397cce9..d55b84ce0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -50,9 +50,11 @@ set(HEADERS if(ARCHITECTURE_x86_64) set(SRCS ${SRCS} + shader/shader_jit_x64.cpp shader/shader_jit_x64_compiler.cpp) set(HEADERS ${HEADERS} + shader/shader_jit_x64.h shader/shader_jit_x64_compiler.h) endif() diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index ce2bd455e..b4a77c632 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -499,7 +499,7 @@ void Init() { } void Shutdown() { - Shader::ClearCache(); + Shader::Shutdown(); } template diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 97c6519d6..b30dae476 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -2,14 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include -#include -#include -#include -#include "common/bit_field.h" -#include "common/hash.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "video_core/pica.h" @@ -17,7 +11,7 @@ #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" #ifdef ARCHITECTURE_x86_64 -#include "video_core/shader/shader_jit_x64_compiler.h" +#include "video_core/shader/shader_jit_x64.h" #endif // ARCHITECTURE_x86_64 #include "video_core/video_core.h" @@ -87,93 +81,33 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { conditional_code[1] = false; } -class MergedShaderEngine : public ShaderEngine { -public: - void SetupBatch(const ShaderSetup* setup) override; - void Run(UnitState& state, unsigned int entry_point) const override; - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const override; - -private: - const ShaderSetup* setup = nullptr; -}; - -#ifdef ARCHITECTURE_x86_64 -static std::unordered_map> shader_map; -static const JitShader* jit_shader; -#endif // ARCHITECTURE_x86_64 +MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ClearCache() { #ifdef ARCHITECTURE_x86_64 - shader_map.clear(); +static std::unique_ptr jit_engine; #endif // ARCHITECTURE_x86_64 -} - -void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) { - setup = setup_; - if (setup == nullptr) - return; +static InterpreterEngine interpreter_engine; +ShaderEngine* GetEngine() { #ifdef ARCHITECTURE_x86_64 + // TODO(yuriks): Re-initialize on each change rather than being persistent if (VideoCore::g_shader_jit_enabled) { - u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); - u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); - - u64 cache_key = code_hash ^ swizzle_hash; - auto iter = shader_map.find(cache_key); - if (iter != shader_map.end()) { - jit_shader = iter->second.get(); - } else { - auto shader = std::make_unique(); - shader->Compile(); - jit_shader = shader.get(); - shader_map[cache_key] = std::move(shader); + if (jit_engine == nullptr) { + jit_engine = std::make_unique(); } + return jit_engine.get(); } #endif // ARCHITECTURE_x86_64 -} - -MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); - -void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const { - ASSERT(setup != nullptr); - ASSERT(entry_point < 1024); - MICROPROFILE_SCOPE(GPU_Shader); + return &interpreter_engine; +} +void Shutdown() { #ifdef ARCHITECTURE_x86_64 - if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(*setup, state, entry_point); - } else { - DebugData dummy_debug_data; - RunInterpreter(*setup, state, dummy_debug_data, entry_point); - } -#else - DebugData dummy_debug_data; - RunInterpreter(*setup, state, dummy_debug_data, entry_point); + jit_engine = nullptr; #endif // ARCHITECTURE_x86_64 } -DebugData MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const { - ASSERT(setup != nullptr); - ASSERT(entry_point < 1024); - - UnitState state; - DebugData debug_data; - - // Setup input register table - boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); - state.LoadInputVertex(input, num_attributes); - RunInterpreter(*setup, state, debug_data, entry_point); - return debug_data; -} - -ShaderEngine* GetEngine() { - static MergedShaderEngine merged_engine; - return &merged_engine; -} - } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 899fb2607..2afd1024f 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -6,7 +6,6 @@ #include #include -#include #include #include #include "common/assert.h" @@ -152,9 +151,6 @@ struct UnitState { void LoadInputVertex(const InputVertex& input, int num_attributes); }; -/// Clears the shader cache -void ClearCache(); - struct ShaderSetup { struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are @@ -210,6 +206,7 @@ public: // TODO(yuriks): Remove and make it non-global state somewhere ShaderEngine* GetEngine(); +void Shutdown(); } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 20fb9754b..8e2b8c548 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -7,10 +7,12 @@ #include #include #include +#include #include #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/vector_math.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" @@ -37,8 +39,8 @@ struct CallStackElement { }; template -void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, - unsigned offset) { +static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, + unsigned offset) { // TODO: Is there a maximal size for this? boost::container::static_vector call_stack; u32 program_counter = offset; @@ -647,9 +649,36 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } } -// Explicit instantiation -template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData&, unsigned offset); -template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData&, unsigned offset); +void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) { + setup = setup_; +} + +MICROPROFILE_DECLARE(GPU_Shader); + +void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const { + ASSERT(setup != nullptr); + ASSERT(entry_point < 1024); + + MICROPROFILE_SCOPE(GPU_Shader); + + DebugData dummy_debug_data; + RunInterpreter(*setup, state, dummy_debug_data, entry_point); +} + +DebugData InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const { + ASSERT(setup != nullptr); + ASSERT(entry_point < 1024); + + UnitState state; + DebugData debug_data; + + // Setup input register table + boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); + state.LoadInputVertex(input, num_attributes); + RunInterpreter(*setup, state, debug_data, entry_point); + return debug_data; +} } // namespace diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 3237b50b3..43c1ed5ea 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -4,19 +4,22 @@ #pragma once +#include "video_core/shader/shader.h" + namespace Pica { namespace Shader { -struct ShaderSetup; -struct UnitState; - -template -struct DebugData; +class InterpreterEngine final : public ShaderEngine { +public: + void SetupBatch(const ShaderSetup* setup) override; + void Run(UnitState& state, unsigned int entry_point) const override; + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const override; -template -void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, - unsigned offset); +private: + const ShaderSetup* setup = nullptr; +}; } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp new file mode 100644 index 000000000..fea79538a --- /dev/null +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/hash.h" +#include "common/microprofile.h" +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_jit_x64.h" +#include "video_core/shader/shader_jit_x64_compiler.h" + +namespace Pica { +namespace Shader { + +JitX64Engine::JitX64Engine() = default; +JitX64Engine::~JitX64Engine() = default; + +void JitX64Engine::SetupBatch(const ShaderSetup* setup_) { + cached_shader = nullptr; + setup = setup_; + if (setup == nullptr) + return; + + u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); + u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); + + u64 cache_key = code_hash ^ swizzle_hash; + auto iter = cache.find(cache_key); + if (iter != cache.end()) { + cached_shader = iter->second.get(); + } else { + auto shader = std::make_unique(); + shader->Compile(); + cached_shader = shader.get(); + cache.emplace_hint(iter, cache_key, std::move(shader)); + } +} + +MICROPROFILE_DECLARE(GPU_Shader); + +void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const { + ASSERT(setup != nullptr); + ASSERT(cached_shader != nullptr); + ASSERT(entry_point < 1024); + + MICROPROFILE_SCOPE(GPU_Shader); + + cached_shader->Run(*setup, state, entry_point); +} + +DebugData JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const { + UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT."); +} + +} // namespace Shader +} // namespace Pica diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h new file mode 100644 index 000000000..df18de2c2 --- /dev/null +++ b/src/video_core/shader/shader_jit_x64.h @@ -0,0 +1,35 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "video_core/shader/shader.h" + +namespace Pica { +namespace Shader { + +class JitShader; + +class JitX64Engine final : public ShaderEngine { +public: + JitX64Engine(); + ~JitX64Engine() override; + + void SetupBatch(const ShaderSetup* setup) override; + void Run(UnitState& state, unsigned int entry_point) const override; + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const override; + +private: + const ShaderSetup* setup = nullptr; + + std::unordered_map> cache; + const JitShader* cached_shader = nullptr; +}; + +} // namespace Shader +} // namespace Pica -- cgit v1.2.3