From 2ef696c85a37917102a9f869775180ab225f0d56 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 11:15:09 -0400 Subject: Shader_IR: Implement BRX tracking. --- src/video_core/shader/control_flow.cpp | 113 +++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 9d21f45de..70f758642 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -124,6 +124,111 @@ enum class ParseResult : u32 { AbnormalFlow, }; +struct BranchIndirectInfo { + u32 buffer{}; + u32 offset{}; + u32 entries{}; + s32 relative_position{}; +}; + +std::optional TrackBranchIndirectInfo(const CFGRebuildState& state, + u32 start_address, u32 current_position) { + const u32 shader_start = state.start; + u32 pos = current_position; + BranchIndirectInfo result{}; + u64 track_register = 0; + + // Step 0 Get BRX Info + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() != OpCode::Id::BRX) { + return {}; + } + if (instr.brx.constant_buffer != 0) { + return {}; + } + track_register = instr.gpr8.Value(); + result.relative_position = instr.brx.GetBranchExtend(); + pos--; + bool found_track = false; + + // Step 1 Track LDC + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::LD_C) { + if (instr.gpr0.Value() == track_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { + result.buffer = instr.cbuf36.index; + result.offset = instr.cbuf36.GetOffset(); + track_register = instr.gpr8.Value(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return {}; + } + found_track = false; + + // Step 2 Track SHL + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { + if (instr.gpr0.Value() == track_register) { + track_register = instr.gpr8.Value(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return {}; + } + found_track = false; + + // Step 3 Track IMNMX + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { + if (instr.gpr0.Value() == track_register) { + track_register = instr.gpr8.Value(); + result.entries = instr.alu.GetSignedImm20_20(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return {}; + } + return {result}; +} + std::pair ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast(address); const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); @@ -298,6 +403,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { + auto tmp = TrackBranchIndirectInfo(state, address, offset); + if (tmp) { + auto result = *tmp; + LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}", + result.buffer, result.offset, result.entries); + } else { + LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); + } return {ParseResult::AbnormalFlow, parse_info}; } default: -- cgit v1.2.3 From 1a58f45d76fe7756dd365e099d1536da769c1eab Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 14:02:02 -0400 Subject: VideoCore: Unify const buffer accessing along engines and provide ConstBufferLocker class to shaders. --- src/video_core/shader/const_buffer_locker.cpp | 72 +++++++++++++++++++++++++++ src/video_core/shader/const_buffer_locker.h | 50 +++++++++++++++++++ src/video_core/shader/shader_ir.h | 1 + 3 files changed, 123 insertions(+) create mode 100644 src/video_core/shader/const_buffer_locker.cpp create mode 100644 src/video_core/shader/const_buffer_locker.h (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp new file mode 100644 index 000000000..6a9e0ed5e --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -0,0 +1,72 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/const_buffer_locker.h" + +namespace VideoCommon::Shader { + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) + : engine{nullptr}, shader_stage{shader_stage} {} + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface* engine) + : engine{engine}, shader_stage{shader_stage} {} + +bool ConstBufferLocker::IsEngineSet() const { + return engine != nullptr; +} + +void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { + engine = engine_; +} + +std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return {iter->second}; + } + if (!IsEngineSet()) { + return {}; + } + const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); + keys.emplace(key, value); + return {value}; +} + +void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + const std::pair key = {buffer, offset}; + keys[key] = value; +} + +u32 ConstBufferLocker::NumKeys() const { + return keys.size(); +} + +const std::unordered_map, u32, Common::PairHash>& +ConstBufferLocker::AccessKeys() const { + return keys; +} + +bool ConstBufferLocker::AreKeysConsistant() const { + if (!IsEngineSet()) { + return false; + } + for (const auto& key_val : keys) { + const std::pair key = key_val.first; + const u32 value = key_val.second; + const u32 other_value = engine->AccessConstBuffer32(shader_stage, key.first, key.second); + if (other_value != value) { + return false; + } + } + return true; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h new file mode 100644 index 000000000..39e62584d --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.h @@ -0,0 +1,50 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" + +namespace VideoCommon::Shader { + +class ConstBufferLocker { +public: + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface* engine); + + // Checks if an engine is setup, it may be possible that during disk shader + // cache run, the engines have not been created yet. + bool IsEngineSet() const; + + // Use this to set/change the engine used for this shader. + void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); + + // Retrieves a key from the locker, if it's registered, it will give the + // registered value, if not it will obtain it from maxwell3d and register it. + std::optional ObtainKey(u32 buffer, u32 offset); + + // Manually inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + // Retrieves the number of keys registered. + u32 NumKeys() const; + + // Gives an accessor to the key's database. + const std::unordered_map, u32, Common::PairHash>& AccessKeys() const; + + // Checks keys against maxwell3d's current const buffers. Returns true if they + // are the same value, false otherwise; + bool AreKeysConsistant() const; + +private: + Tegra::Engines::ConstBufferEngineInterface* engine; + Tegra::Engines::ShaderType shader_stage; + std::unordered_map, u32, Common::PairHash> keys{}; +}; +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 91cd0a534..68818643c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -17,6 +17,7 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" namespace VideoCommon::Shader { -- cgit v1.2.3 From acd64411342e70bd7e9f7156f62c3b1a609ac3c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 15:40:58 -0400 Subject: Shader_Cache: setup connection of ConstBufferLocker --- src/video_core/shader/control_flow.cpp | 22 +++++++++++++++------- src/video_core/shader/control_flow.h | 3 ++- src/video_core/shader/decode.cpp | 2 +- src/video_core/shader/shader_ir.cpp | 4 ++-- src/video_core/shader/shader_ir.h | 3 ++- 5 files changed, 22 insertions(+), 12 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 70f758642..dac2e4272 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -57,8 +57,8 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start) - : start{start}, program_code{program_code}, program_size{program_size} {} + const u32 start, ConstBufferLocker& locker) + : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} u32 start{}; std::vector block_info{}; @@ -72,6 +72,7 @@ struct CFGRebuildState { const ProgramCode& program_code; const std::size_t program_size; ASTManager* manager; + ConstBufferLocker& locker; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -214,7 +215,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { if (instr.gpr0.Value() == track_register) { track_register = instr.gpr8.Value(); - result.entries = instr.alu.GetSignedImm20_20(); + result.entries = instr.alu.GetSignedImm20_20() + 1; pos--; found_track = true; break; @@ -406,8 +407,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) auto tmp = TrackBranchIndirectInfo(state, address, offset); if (tmp) { auto result = *tmp; - LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}", - result.buffer, result.offset, result.entries); + std::string entries{}; + for (u32 i = 0; i < result.entries; i++) { + auto k = locker.ObtainKey(result.buffer, result.offset + i * 4); + entries = entries + std::to_string(*k) + '\n'; + } + LOG_CRITICAL(HW_GPU, + "Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}", + result.buffer, result.offset, result.entries, entries); } else { LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); } @@ -588,14 +595,15 @@ void DecompileShader(CFGRebuildState& state) { std::unique_ptr ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address, - const CompilerSettings& settings) { + const CompilerSettings& settings, + ConstBufferLocker& locker) { auto result_out = std::make_unique(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, program_size, start_address}; + CFGRebuildState state{program_code, program_size, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 37e987d62..6d0e50d7c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -78,6 +78,7 @@ struct ShaderCharacteristics { std::unique_ptr ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address, - const CompilerSettings& settings); + const CompilerSettings& settings, + ConstBufferLocker& locker); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2626b1616..3f87b87ca 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings); + auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index c1f2b88c8..6430575ec 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -23,9 +23,9 @@ using Tegra::Shader::PredOperation; using Tegra::Shader::Register; ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings) + CompilerSettings settings, ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings} { + program_manager{true, true}, settings{settings}, locker{locker} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 68818643c..e3b568d3e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -68,7 +68,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings); + CompilerSettings settings, ConstBufferLocker& locker); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -389,6 +389,7 @@ private: NodeBlock global_code; ASTManager program_manager; CompilerSettings settings{}; + ConstBufferLocker& locker; std::set used_registers; std::set used_predicates; -- cgit v1.2.3 From 8909f52166bf9c27d52b5a722efbd46d1a11e876 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 22:55:25 -0400 Subject: Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. --- src/video_core/shader/ast.cpp | 4 + src/video_core/shader/control_flow.cpp | 262 ++++++++++++++++++++------------- src/video_core/shader/control_flow.h | 59 ++++++-- src/video_core/shader/decode.cpp | 34 +++-- src/video_core/shader/expr.h | 17 ++- 5 files changed, 246 insertions(+), 130 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index e43aecc18..2fa3a3f7d 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -228,6 +228,10 @@ public: inner += expr.value ? "true" : "false"; } + void operator()(ExprGprEqual const& expr) { + inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + } + const std::string& GetResult() const { return inner; } diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index dac2e4272..d1c269ea7 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -35,14 +35,24 @@ struct BlockStack { std::stack pbk_stack{}; }; -struct BlockBranchInfo { - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; +template +BlockBranchInfo MakeBranchInfo(Args&&... args) { + static_assert(std::is_convertible_v); + return std::make_shared(T(std::forward(args)...)); +} + +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second) { + return false; //(*first) == (*second); +} + +bool BlockBranchIsIgnored(BlockBranchInfo first) { + bool ignore = false; + if (std::holds_alternative(*first)) { + auto branch = std::get_if(first.get()); + ignore = branch->ignore; + } + return ignore; +} struct BlockInfo { u32 start{}; @@ -234,6 +244,7 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) u32 offset = static_cast(address); const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); ParseInfo parse_info{}; + SingleBranch single_branch{}; const auto insert_label = [](CFGRebuildState& state, u32 address) { const auto pair = state.labels.emplace(address); @@ -246,13 +257,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) if (offset >= end_address) { // ASSERT_OR_EXECUTE can't be used, as it ignores the break ASSERT_MSG(false, "Shader passed the current limit!"); - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.ignore = false; + + single_branch.address = exit_branch; + single_branch.ignore = false; break; } if (state.registered.count(offset) != 0) { - parse_info.branch_info.address = offset; - parse_info.branch_info.ignore = true; + single_branch.address = offset; + single_branch.ignore = true; break; } if (IsSchedInstruction(offset, state.start)) { @@ -269,24 +281,26 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -295,99 +309,107 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) return {ParseResult::AbnormalFlow, parse_info}; } const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } const u32 branch_offset = offset + instr.bra.GetBranchTarget(); if (branch_offset == 0) { - parse_info.branch_info.address = exit_branch; + single_branch.address = exit_branch; } else { - parse_info.branch_info.address = branch_offset; + single_branch.address = branch_offset; } insert_label(state, branch_offset); - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SYNC: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = true; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = true; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRK: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = true; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = true; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::KIL: { const auto pred_index = static_cast(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = true; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = true; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -407,16 +429,25 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) auto tmp = TrackBranchIndirectInfo(state, address, offset); if (tmp) { auto result = *tmp; - std::string entries{}; + std::vector branches{}; + s32 pc_target = offset + result.relative_position; for (u32 i = 0; i < result.entries; i++) { - auto k = locker.ObtainKey(result.buffer, result.offset + i * 4); - entries = entries + std::to_string(*k) + '\n'; + auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!k) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *k; + u32 target = static_cast((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); } - LOG_CRITICAL(HW_GPU, - "Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}", - result.buffer, result.offset, result.entries, entries); + parse_info.end_address = offset; + parse_info.branch_info = + MakeBranchInfo(static_cast(instr.gpr8.Value()), branches); + + return {ParseResult::ControlCaught, parse_info}; } else { - LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); + LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); } return {ParseResult::AbnormalFlow, parse_info}; } @@ -426,10 +457,13 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) offset++; } - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; parse_info.end_address = offset - 1; + parse_info.branch_info = MakeBranchInfo( + single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, + single_branch.is_brk, single_branch.ignore); return {ParseResult::BlockEnd, parse_info}; } @@ -453,9 +487,10 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; new_block.branch = current_block.branch; - BlockBranchInfo forward_branch{}; - forward_branch.address = address; - forward_branch.ignore = true; + BlockBranchInfo forward_branch = MakeBranchInfo(); + auto branch = std::get_if(forward_branch.get()); + branch->address = address; + branch->ignore = true; current_block.branch = forward_branch; return true; } @@ -470,12 +505,15 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; - if (parse_info.branch_info.condition.IsUnconditional()) { + if (std::holds_alternative(*block_info.branch)) { + auto branch = std::get_if(block_info.branch.get()); + if (branch->condition.IsUnconditional()) { + return true; + } + const u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); return true; } - - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); return true; } @@ -513,31 +551,41 @@ bool TryQuery(CFGRebuildState& state) { state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (!block.branch.condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } + if (std::holds_alternative(*block.branch)) { + auto branch = std::get_if(block.branch.get()); + if (!branch->condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } - Query conditional_query{q2}; - if (block.branch.is_sync) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.ssy_stack.top(); + Query conditional_query{q2}; + if (branch->is_sync) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.ssy_stack.top(); + } + conditional_query.ssy_stack.pop(); } - conditional_query.ssy_stack.pop(); - } - if (block.branch.is_brk) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.pbk_stack.top(); + if (branch->is_brk) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.pbk_stack.top(); + } + conditional_query.pbk_stack.pop(); } - conditional_query.pbk_stack.pop(); + conditional_query.address = branch->address; + state.queries.push_back(std::move(conditional_query)); + return true; + } + auto multi_branch = std::get_if(block.branch.get()); + for (auto& branch_case : multi_branch->branches) { + Query conditional_query{q2}; + conditional_query.address = branch_case.address; + state.queries.push_back(std::move(conditional_query)); } - conditional_query.address = block.branch.address; - state.queries.push_back(std::move(conditional_query)); return true; } } // Anonymous namespace -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { +void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { const auto get_expr = ([&](const Condition& cond) -> Expr { Expr result{}; if (cond.cc != ConditionCode::T) { @@ -564,15 +612,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { } return MakeExpr(true); }); - if (branch.address < 0) { - if (branch.kill) { - mm.InsertReturn(get_expr(branch.condition), true); + if (std::holds_alternative(*branch_info)) { + auto branch = std::get_if(branch_info.get()); + if (branch->address < 0) { + if (branch->kill) { + mm.InsertReturn(get_expr(branch->condition), true); + return; + } + mm.InsertReturn(get_expr(branch->condition), false); return; } - mm.InsertReturn(get_expr(branch.condition), false); + mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - mm.InsertGoto(get_expr(branch.condition), branch.address); + auto multi_branch = std::get_if(branch_info.get()); + for (auto& branch_case : multi_branch->branches) { + mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), + branch_case.address); + } } void DecompileShader(CFGRebuildState& state) { @@ -584,9 +641,10 @@ void DecompileShader(CFGRebuildState& state) { if (state.labels.count(block.start) != 0) { state.manager->InsertLabel(block.start); } - u32 end = block.branch.ignore ? block.end + 1 : block.end; + const bool ignore = BlockBranchIsIgnored(block.branch); + u32 end = ignore ? block.end + 1 : block.end; state.manager->InsertBlock(block.start, end); - if (!block.branch.ignore) { + if (!ignore) { InsertBranch(*state.manager, block.branch); } } @@ -668,11 +726,9 @@ std::unique_ptr ScanFlow(const ProgramCode& program_code, ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.ignore_branch = block.branch.ignore; + new_block.ignore_branch = BlockBranchIsIgnored(block.branch); if (!new_block.ignore_branch) { - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.branch = block.branch; } result_out->end = std::max(result_out->end, block.end); result_out->blocks.push_back(new_block); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 6d0e50d7c..369ca255b 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/ast.h" @@ -37,29 +38,57 @@ struct Condition { } }; -struct ShaderBlock { - struct Branch { - Condition cond{}; - bool kills{}; - s32 address{}; +class SingleBranch { +public: + SingleBranch() = default; + SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, + bool ignore) + : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, + ignore{ignore} {} + + bool operator==(const SingleBranch& b) const { + return std::tie(condition, address, kill, is_sync, is_brk, ignore) == + std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); + } + + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; + bool ignore{}; +}; + +struct CaseBranch { + CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + u32 cmp_value; + u32 address; +}; + +class MultiBranch { +public: + MultiBranch(u32 gpr, std::vector& branches) + : gpr{gpr}, branches{std::move(branches)} {} - bool operator==(const Branch& b) const { - return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); - } + u32 gpr{}; + std::vector branches{}; +}; - bool operator!=(const Branch& b) const { - return !operator==(b); - } - }; +using BranchData = std::variant; +using BlockBranchInfo = std::shared_ptr; +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); + +struct ShaderBlock { u32 start{}; u32 end{}; bool ignore_branch{}; - Branch branch{}; + BlockBranchInfo branch{}; bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch, branch) == - std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); + return std::tie(start, end, ignore_branch) == + std::tie(sb.start, sb.end, sb.ignore_branch) && + BlockBranchInfoAreEqual(branch, sb.branch); } bool operator!=(const ShaderBlock& sb) const { diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 3f87b87ca..053241128 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -198,24 +198,38 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { } return result; }; - if (block.branch.address < 0) { - if (block.branch.kills) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(block.branch.cond, n); + if (std::holds_alternative(*block.branch)) { + auto branch = std::get_if(block.branch.get()); + if (branch->address < 0) { + if (branch->kill) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(branch->condition, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(block.branch.cond, n); + Node n = Operation(OperationCode::Branch, Immediate(branch->address)); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); - n = apply_conditions(block.branch.cond, n); - bb.push_back(n); - global_code.push_back(n); + auto multi_branch = std::get_if(block.branch.get()); + Node op_a = GetRegister(multi_branch->gpr); + for (auto& branch_case : multi_branch->branches) { + Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); + Node op_b = Immediate(branch_case.cmp_value); + Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + auto result = Conditional(condition, {n}); + bb.push_back(result); + global_code.push_back(result); + } } u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index d3dcd00ec..e41d23e93 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -17,13 +17,14 @@ using Tegra::Shader::Pred; class ExprAnd; class ExprBoolean; class ExprCondCode; +class ExprGprEqual; class ExprNot; class ExprOr; class ExprPredicate; class ExprVar; -using ExprData = - std::variant; +using ExprData = std::variant; using Expr = std::shared_ptr; class ExprAnd final { @@ -118,6 +119,18 @@ public: bool value; }; +class ExprGprEqual final { +public: + ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + + bool operator==(const ExprGprEqual& b) const { + return gpr == b.gpr && value == b.value; + } + + u32 gpr; + u32 value; +}; + template Expr MakeExpr(Args&&... args) { static_assert(std::is_convertible_v); -- cgit v1.2.3 From 33fcec3502f5dd5a99b7a8337128b7c99bfba908 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 09:53:18 -0400 Subject: Shader_IR: allow lookup of texture samplers within the shader_ir for instructions that don't provide it --- src/video_core/shader/const_buffer_locker.cpp | 110 ++++++++++++++++++++++---- src/video_core/shader/const_buffer_locker.h | 60 ++++++++++++-- src/video_core/shader/decode/texture.cpp | 72 ++++++++++++----- src/video_core/shader/shader_ir.h | 12 ++- 4 files changed, 212 insertions(+), 42 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 6a9e0ed5e..4f5de8ae9 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -27,43 +27,121 @@ void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* en } std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + if (!keys) { + keys = std::make_shared(); + } + auto& key_map = *keys; const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { + const auto iter = key_map.find(key); + if (iter != key_map.end()) { return {iter->second}; } if (!IsEngineSet()) { return {}; } const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); - keys.emplace(key, value); + key_map.emplace(key, value); + return {value}; +} + +std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { + if (!bound_samplers) { + bound_samplers = std::make_shared(); + } + auto& key_map = *bound_samplers; + const u32 key = offset; + const auto iter = key_map.find(key); + if (iter != key_map.end()) { + return {iter->second}; + } + if (!IsEngineSet()) { + return {}; + } + const Tegra::Engines::SamplerDescriptor value = + engine->AccessBoundSampler(shader_stage, offset); + key_map.emplace(key, value); + return {value}; +} + +std::optional ConstBufferLocker::ObtainBindlessSampler( + u32 buffer, u32 offset) { + if (!bindless_samplers) { + bindless_samplers = std::make_shared(); + } + auto& key_map = *bindless_samplers; + const std::pair key = {buffer, offset}; + const auto iter = key_map.find(key); + if (iter != key_map.end()) { + return {iter->second}; + } + if (!IsEngineSet()) { + return {}; + } + const Tegra::Engines::SamplerDescriptor value = + engine->AccessBindlessSampler(shader_stage, buffer, offset); + key_map.emplace(key, value); return {value}; } void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + if (!keys) { + keys = std::make_shared(); + } const std::pair key = {buffer, offset}; - keys[key] = value; + (*keys)[key] = value; } -u32 ConstBufferLocker::NumKeys() const { - return keys.size(); +void ConstBufferLocker::InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler) { + if (!bound_samplers) { + bound_samplers = std::make_shared(); + } + (*bound_samplers)[offset] = sampler; } -const std::unordered_map, u32, Common::PairHash>& -ConstBufferLocker::AccessKeys() const { - return keys; +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, + Tegra::Engines::SamplerDescriptor sampler) { + if (!bindless_samplers) { + bindless_samplers = std::make_shared(); + } + const std::pair key = {buffer, offset}; + (*bindless_samplers)[key] = sampler; } -bool ConstBufferLocker::AreKeysConsistant() const { +bool ConstBufferLocker::IsConsistant() const { if (!IsEngineSet()) { return false; } - for (const auto& key_val : keys) { - const std::pair key = key_val.first; - const u32 value = key_val.second; - const u32 other_value = engine->AccessConstBuffer32(shader_stage, key.first, key.second); - if (other_value != value) { - return false; + if (keys) { + for (const auto& key_val : *keys) { + const std::pair key = key_val.first; + const u32 value = key_val.second; + const u32 other_value = + engine->AccessConstBuffer32(shader_stage, key.first, key.second); + if (other_value != value) { + return false; + } + } + } + if (bound_samplers) { + for (const auto& sampler_val : *bound_samplers) { + const u32 key = sampler_val.first; + const Tegra::Engines::SamplerDescriptor value = sampler_val.second; + const Tegra::Engines::SamplerDescriptor other_value = + engine->AccessBoundSampler(shader_stage, key); + if (other_value.raw != value.raw) { + return false; + } + } + } + if (bindless_samplers) { + for (const auto& sampler_val : *bindless_samplers) { + const std::pair key = sampler_val.first; + const Tegra::Engines::SamplerDescriptor value = sampler_val.second; + const Tegra::Engines::SamplerDescriptor other_value = + engine->AccessBindlessSampler(shader_stage, key.first, key.second); + if (other_value.raw != value.raw) { + return false; + } } } return true; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 39e62584d..0bc257781 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -11,6 +11,11 @@ namespace VideoCommon::Shader { +using KeyMap = std::unordered_map, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map; +using BindlessSamplerMap = + std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + class ConstBufferLocker { public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); @@ -29,22 +34,67 @@ public: // registered value, if not it will obtain it from maxwell3d and register it. std::optional ObtainKey(u32 buffer, u32 offset); + std::optional ObtainBoundSampler(u32 offset); + + std::optional ObtainBindlessSampler(u32 buffer, u32 offset); + // Manually inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + // Retrieves the number of keys registered. - u32 NumKeys() const; + std::size_t NumKeys() const { + if (!keys) { + return 0; + } + return keys->size(); + } + + std::size_t NumBoundSamplers() const { + if (!bound_samplers) { + return 0; + } + return bound_samplers->size(); + } + + std::size_t NumBindlessSamplers() const { + if (!bindless_samplers) { + return 0; + } + return bindless_samplers->size(); + } // Gives an accessor to the key's database. - const std::unordered_map, u32, Common::PairHash>& AccessKeys() const; + // Pre: NumKeys > 0 + const KeyMap& AccessKeys() const { + return *keys; + } + + // Gives an accessor to the sampler's database. + // Pre: NumBindlessSamplers > 0 + const BoundSamplerMap& AccessBoundSamplers() const { + return *bound_samplers; + } + + // Gives an accessor to the sampler's database. + // Pre: NumBindlessSamplers > 0 + const BindlessSamplerMap& AccessBindlessSamplers() const { + return *bindless_samplers; + } - // Checks keys against maxwell3d's current const buffers. Returns true if they + // Checks keys & samplers against engine's current const buffers. Returns true if they // are the same value, false otherwise; - bool AreKeysConsistant() const; + bool IsConsistant() const; private: Tegra::Engines::ConstBufferEngineInterface* engine; Tegra::Engines::ShaderType shader_stage; - std::unordered_map, u32, Common::PairHash> keys{}; + // All containers are lazy initialized as most shaders don't use them. + std::shared_ptr keys{}; + std::shared_ptr bound_samplers{}; + std::shared_ptr bindless_samplers{}; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..c369e23ad 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node component = Immediate(static_cast(instr.tld4s.component)); const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless - ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false) - : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); u32 indexer = 0; switch (instr.txq.query_type) { @@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = is_bindless - ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) - : GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = + is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); std::vector coords; @@ -285,10 +282,30 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional sampler_info) { const auto offset = static_cast(sampler.index.Value()); + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else { + auto sampler = locker.ObtainBoundSampler(offset); + if (sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } + } + // If this sampler has already been used, return the existing mapping. const auto itr = std::find_if(used_samplers.begin(), used_samplers.end(), @@ -305,13 +322,32 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + std::optional sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, cbuf_index, cbuf_offset] = TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); ASSERT(base_sampler != nullptr); const auto cbuf_key = (static_cast(cbuf_index) << 32) | static_cast(cbuf_offset); + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else { + auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); + if (sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } + } // If this sampler has already been used, return the existing mapping. const auto itr = @@ -411,9 +447,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) - : GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) + : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -577,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -610,7 +646,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -646,7 +682,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e3b568d3e..3a3e381d2 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -173,6 +173,13 @@ public: private: friend class ASTDecoder; + + struct SamplerInfo { + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + }; + void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); @@ -297,12 +304,11 @@ private: /// Accesses a texture sampler const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + std::optional sampler_info); // Accesses a texture sampler for a bindless texture. const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - Tegra::Shader::TextureType type, bool is_array, - bool is_shadow); + std::optional sampler_info); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); -- cgit v1.2.3 From a05120ec0b8b1827ebeffd4e78a553f7886fa178 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 15:03:13 -0400 Subject: Shader_IR: Correct typo in Consistent method. --- src/video_core/shader/const_buffer_locker.cpp | 2 +- src/video_core/shader/const_buffer_locker.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 4f5de8ae9..9d23bcecf 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -107,7 +107,7 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, (*bindless_samplers)[key] = sampler; } -bool ConstBufferLocker::IsConsistant() const { +bool ConstBufferLocker::IsConsistent() const { if (!IsEngineSet()) { return false; } diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 0bc257781..13eeba320 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -87,7 +87,7 @@ public: // Checks keys & samplers against engine's current const buffers. Returns true if they // are the same value, false otherwise; - bool IsConsistant() const; + bool IsConsistent() const; private: Tegra::Engines::ConstBufferEngineInterface* engine; -- cgit v1.2.3 From 1244f2d368076aec61327ee1440c5efd9ae046d6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 23 Sep 2019 22:55:25 -0400 Subject: Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. --- src/video_core/shader/ast.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 2fa3a3f7d..3f96d9076 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -228,7 +228,7 @@ public: inner += expr.value ? "true" : "false"; } - void operator()(ExprGprEqual const& expr) { + void operator()(const ExprGprEqual& expr) { inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; } -- cgit v1.2.3 From 7b81ba4d8a9805f808fcc60a0905ac74d293b2ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 24 Sep 2019 23:34:18 -0300 Subject: gl_shader_decompiler: Move entries to a separate function --- src/video_core/shader/const_buffer_locker.cpp | 8 ++++---- src/video_core/shader/const_buffer_locker.h | 4 ++-- src/video_core/shader/control_flow.cpp | 18 ++++++++---------- src/video_core/shader/control_flow.h | 3 +-- src/video_core/shader/decode.cpp | 9 +++++---- src/video_core/shader/shader_ir.cpp | 7 +++---- src/video_core/shader/shader_ir.h | 12 ++++++------ 7 files changed, 29 insertions(+), 32 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 9d23bcecf..37a0968a1 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -15,15 +15,15 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) : engine{nullptr}, shader_stage{shader_stage} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface* engine) - : engine{engine}, shader_stage{shader_stage} {} + Tegra::Engines::ConstBufferEngineInterface& engine) + : engine{&engine}, shader_stage{shader_stage} {} bool ConstBufferLocker::IsEngineSet() const { return engine != nullptr; } -void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { - engine = engine_; +void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) { + engine = &engine_; } std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 13eeba320..54459977f 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -21,14 +21,14 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface* engine); + Tegra::Engines::ConstBufferEngineInterface& engine); // Checks if an engine is setup, it may be possible that during disk shader // cache run, the engines have not been created yet. bool IsEngineSet() const; // Use this to set/change the engine used for this shader. - void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); + void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine); // Retrieves a key from the locker, if it's registered, it will give the // registered value, if not it will obtain it from maxwell3d and register it. diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d1c269ea7..6c698bcff 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -66,10 +66,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start, ConstBufferLocker& locker) - : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) + : program_code{program_code}, start{start}, locker{locker} {} + const ProgramCode& program_code; + ConstBufferLocker& locker; u32 start{}; std::vector block_info{}; std::list inspect_queries{}; @@ -79,10 +80,7 @@ struct CFGRebuildState { std::map ssy_labels{}; std::map pbk_labels{}; std::unordered_map stacks{}; - const ProgramCode& program_code; - const std::size_t program_size; ASTManager* manager; - ConstBufferLocker& locker; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -242,7 +240,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& std::pair ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); + const u32 end_address = static_cast(state.program_code.size()); ParseInfo parse_info{}; SingleBranch single_branch{}; @@ -583,6 +581,7 @@ bool TryQuery(CFGRebuildState& state) { } return true; } + } // Anonymous namespace void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { @@ -651,8 +650,7 @@ void DecompileShader(CFGRebuildState& state) { state.manager->Decompile(); } -std::unique_ptr ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address, +std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, ConstBufferLocker& locker) { auto result_out = std::make_unique(); @@ -661,7 +659,7 @@ std::unique_ptr ScanFlow(const ProgramCode& program_code, return result_out; } - CFGRebuildState state{program_code, program_size, start_address, locker}; + CFGRebuildState state{program_code, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 369ca255b..288ee68af 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -105,8 +105,7 @@ struct ShaderCharacteristics { CompilerSettings settings{}; }; -std::unique_ptr ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address, +std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, ConstBufferLocker& locker); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 053241128..e1afa4582 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace class ASTDecoder { public: @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); + auto info = ScanFlow(program_code, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = static_cast(program_size / sizeof(u64)); + const u32 shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); @@ -225,7 +225,8 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { for (auto& branch_case : multi_branch->branches) { Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); Node op_b = Immediate(branch_case.cmp_value); - Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); auto result = Conditional(condition, {n}); bb.push_back(result); global_code.push_back(result); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 6430575ec..1d718ccc6 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -22,10 +22,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings, ConstBufferLocker& locker) - : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings}, locker{locker} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 3a3e381d2..3ebea91b9 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -67,8 +67,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings, ConstBufferLocker& locker); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -384,7 +384,9 @@ private: const ProgramCode& program_code; const u32 main_offset; - const std::size_t program_size; + const CompilerSettings settings; + ConstBufferLocker& locker; + bool decompiled{}; bool disable_flow_stack{}; @@ -393,9 +395,7 @@ private: std::map basic_blocks; NodeBlock global_code; - ASTManager program_manager; - CompilerSettings settings{}; - ConstBufferLocker& locker; + ASTManager program_manager{true, true}; std::set used_registers; std::set used_predicates; -- cgit v1.2.3 From fa2c297f3eddc718123767142dbc296270f58f7a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 25 Sep 2019 19:19:41 -0300 Subject: const_buffer_locker: Minor style changes --- src/video_core/shader/const_buffer_locker.cpp | 152 +++++++++----------------- src/video_core/shader/const_buffer_locker.h | 76 ++++--------- 2 files changed, 76 insertions(+), 152 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 37a0968a1..ebeba102d 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -4,6 +4,8 @@ #pragma once +#include +#include #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" @@ -11,140 +13,92 @@ namespace VideoCommon::Shader { +using Tegra::Engines::SamplerDescriptor; + ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) - : engine{nullptr}, shader_stage{shader_stage} {} + : stage{shader_stage} {} ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) - : engine{&engine}, shader_stage{shader_stage} {} - -bool ConstBufferLocker::IsEngineSet() const { - return engine != nullptr; -} - -void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) { - engine = &engine_; -} + : stage{shader_stage}, engine{&engine} {} std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { - if (!keys) { - keys = std::make_shared(); - } - auto& key_map = *keys; const std::pair key = {buffer, offset}; - const auto iter = key_map.find(key); - if (iter != key_map.end()) { - return {iter->second}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; } - if (!IsEngineSet()) { + if (!engine) { return {}; } - const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); - key_map.emplace(key, value); - return {value}; + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; } -std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { - if (!bound_samplers) { - bound_samplers = std::make_shared(); - } - auto& key_map = *bound_samplers; +std::optional ConstBufferLocker::ObtainBoundSampler(u32 offset) { const u32 key = offset; - const auto iter = key_map.find(key); - if (iter != key_map.end()) { - return {iter->second}; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; } - if (!IsEngineSet()) { + if (!engine) { return {}; } - const Tegra::Engines::SamplerDescriptor value = - engine->AccessBoundSampler(shader_stage, offset); - key_map.emplace(key, value); - return {value}; + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; } std::optional ConstBufferLocker::ObtainBindlessSampler( u32 buffer, u32 offset) { - if (!bindless_samplers) { - bindless_samplers = std::make_shared(); - } - auto& key_map = *bindless_samplers; - const std::pair key = {buffer, offset}; - const auto iter = key_map.find(key); - if (iter != key_map.end()) { - return {iter->second}; + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; } - if (!IsEngineSet()) { + if (!engine) { return {}; } - const Tegra::Engines::SamplerDescriptor value = - engine->AccessBindlessSampler(shader_stage, buffer, offset); - key_map.emplace(key, value); - return {value}; + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; } void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { - if (!keys) { - keys = std::make_shared(); - } - const std::pair key = {buffer, offset}; - (*keys)[key] = value; + keys.insert_or_assign({buffer, offset}, value); } -void ConstBufferLocker::InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler) { - if (!bound_samplers) { - bound_samplers = std::make_shared(); - } - (*bound_samplers)[offset] = sampler; +void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); } -void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, - Tegra::Engines::SamplerDescriptor sampler) { - if (!bindless_samplers) { - bindless_samplers = std::make_shared(); - } - const std::pair key = {buffer, offset}; - (*bindless_samplers)[key] = sampler; +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); } bool ConstBufferLocker::IsConsistent() const { - if (!IsEngineSet()) { + if (!engine) { return false; } - if (keys) { - for (const auto& key_val : *keys) { - const std::pair key = key_val.first; - const u32 value = key_val.second; - const u32 other_value = - engine->AccessConstBuffer32(shader_stage, key.first, key.second); - if (other_value != value) { - return false; - } - } - } - if (bound_samplers) { - for (const auto& sampler_val : *bound_samplers) { - const u32 key = sampler_val.first; - const Tegra::Engines::SamplerDescriptor value = sampler_val.second; - const Tegra::Engines::SamplerDescriptor other_value = - engine->AccessBoundSampler(shader_stage, key); - if (other_value.raw != value.raw) { - return false; - } - } - } - if (bindless_samplers) { - for (const auto& sampler_val : *bindless_samplers) { - const std::pair key = sampler_val.first; - const Tegra::Engines::SamplerDescriptor value = sampler_val.second; - const Tegra::Engines::SamplerDescriptor other_value = - engine->AccessBindlessSampler(shader_stage, key.first, key.second); - if (other_value.raw != value.raw) { - return false; - } - } - } - return true; + return std::all_of(keys.begin(), keys.end(), + [](const auto& key) { + const auto [value, other_value] = key.first; + return value == other_value; + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + const auto other_value = engine->AccessBoundSampler(stage, key); + return value.raw == other_value.raw; + }) && + std::all_of( + bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); + return value.raw == other_value.raw; + }); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 54459977f..417d5a16f 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -23,78 +23,48 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); - // Checks if an engine is setup, it may be possible that during disk shader - // cache run, the engines have not been created yet. - bool IsEngineSet() const; - - // Use this to set/change the engine used for this shader. - void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine); - - // Retrieves a key from the locker, if it's registered, it will give the - // registered value, if not it will obtain it from maxwell3d and register it. + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. std::optional ObtainKey(u32 buffer, u32 offset); std::optional ObtainBoundSampler(u32 offset); std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - // Manually inserts a key. + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); + /// Inserts a bound sampler key. void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - // Retrieves the number of keys registered. - std::size_t NumKeys() const { - if (!keys) { - return 0; - } - return keys->size(); - } - - std::size_t NumBoundSamplers() const { - if (!bound_samplers) { - return 0; - } - return bound_samplers->size(); - } - - std::size_t NumBindlessSamplers() const { - if (!bindless_samplers) { - return 0; - } - return bindless_samplers->size(); - } + /// Checks keys and samplers against engine's current const buffers. Returns true if they are + /// the same value, false otherwise; + bool IsConsistent() const; - // Gives an accessor to the key's database. - // Pre: NumKeys > 0 - const KeyMap& AccessKeys() const { - return *keys; + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; } - // Gives an accessor to the sampler's database. - // Pre: NumBindlessSamplers > 0 - const BoundSamplerMap& AccessBoundSamplers() const { - return *bound_samplers; + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; } - // Gives an accessor to the sampler's database. - // Pre: NumBindlessSamplers > 0 - const BindlessSamplerMap& AccessBindlessSamplers() const { - return *bindless_samplers; + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; } - // Checks keys & samplers against engine's current const buffers. Returns true if they - // are the same value, false otherwise; - bool IsConsistent() const; - private: - Tegra::Engines::ConstBufferEngineInterface* engine; - Tegra::Engines::ShaderType shader_stage; - // All containers are lazy initialized as most shaders don't use them. - std::shared_ptr keys{}; - std::shared_ptr bound_samplers{}; - std::shared_ptr bindless_samplers{}; + const Tegra::Engines::ShaderType stage; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; }; + } // namespace VideoCommon::Shader -- cgit v1.2.3 From ec85648af3316d5e43c7b57fca55d0dad3d03f96 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 25 Sep 2019 21:46:34 -0300 Subject: gl_shader_disk_cache: Store and load fast BRX --- src/video_core/shader/const_buffer_locker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index ebeba102d..fda9e3c38 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -90,14 +90,14 @@ bool ConstBufferLocker::IsConsistent() const { [this](const auto& sampler) { const auto [key, value] = sampler; const auto other_value = engine->AccessBoundSampler(stage, key); - return value.raw == other_value.raw; + return value == other_value; }) && std::all_of( bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { const auto [cbuf, offset] = sampler.first; const auto value = sampler.second; const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); - return value.raw == other_value.raw; + return value == other_value; }); } -- cgit v1.2.3 From 78f3e8a75792c976eb5bfa6df4c020d898642684 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 26 Sep 2019 00:23:08 -0300 Subject: gl_shader_cache: Implement locker variants invalidation --- src/video_core/shader/const_buffer_locker.cpp | 28 +++++++++++++++------------ src/video_core/shader/const_buffer_locker.h | 3 +++ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index fda9e3c38..592bbf657 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -82,23 +82,27 @@ bool ConstBufferLocker::IsConsistent() const { return false; } return std::all_of(keys.begin(), keys.end(), - [](const auto& key) { - const auto [value, other_value] = key.first; - return value == other_value; + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); }) && std::all_of(bound_samplers.begin(), bound_samplers.end(), [this](const auto& sampler) { const auto [key, value] = sampler; - const auto other_value = engine->AccessBoundSampler(stage, key); - return value == other_value; + return value == engine->AccessBoundSampler(stage, key); }) && - std::all_of( - bindless_samplers.begin(), bindless_samplers.end(), [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - const auto other_value = engine->AccessBindlessSampler(stage, cbuf, offset); - return value == other_value; - }); + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { + return keys == rhs.keys && bound_samplers == rhs.bound_samplers && + bindless_samplers == rhs.bindless_samplers; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 417d5a16f..966537fd6 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -44,6 +44,9 @@ public: /// the same value, false otherwise; bool IsConsistent() const; + /// Returns true if the keys are equal to the other ones in the locker. + bool HasEqualKeys(const ConstBufferLocker& rhs) const; + /// Gives an getter to the const buffer keys in the database. const KeyMap& GetKeys() const { return keys; -- cgit v1.2.3 From be856a38d6b0c7c90c861baf3204ac48a108f3d2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Oct 2019 10:35:16 -0400 Subject: Shader_IR: Address Feedback. --- src/video_core/shader/const_buffer_locker.cpp | 8 +++-- src/video_core/shader/const_buffer_locker.h | 7 ++++ src/video_core/shader/control_flow.cpp | 46 ++++++++++++--------------- src/video_core/shader/control_flow.h | 6 +++- src/video_core/shader/decode.cpp | 2 +- src/video_core/shader/decode/texture.cpp | 38 ++++++++++------------ src/video_core/shader/expr.h | 4 +++ 7 files changed, 59 insertions(+), 52 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 592bbf657..fe467608e 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -22,6 +22,8 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) : stage{shader_stage}, engine{&engine} {} +ConstBufferLocker::~ConstBufferLocker() = default; + std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; const auto iter = keys.find(key); @@ -29,7 +31,7 @@ std::optional ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { return iter->second; } if (!engine) { - return {}; + return std::nullopt; } const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); keys.emplace(key, value); @@ -43,7 +45,7 @@ std::optional ConstBufferLocker::ObtainBoundSampler(u32 offse return iter->second; } if (!engine) { - return {}; + return std::nullopt; } const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); bound_samplers.emplace(key, value); @@ -58,7 +60,7 @@ std::optional ConstBufferLocker::ObtainBindle return iter->second; } if (!engine) { - return {}; + return std::nullopt; } const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); bindless_samplers.emplace(key, value); diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 966537fd6..600e2f3c3 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -16,6 +16,11 @@ using BoundSamplerMap = std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; +/** + * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader + * compiler. with it, the shader can obtain required data from GPU state and store it for disk + * shader compilation. + **/ class ConstBufferLocker { public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); @@ -23,6 +28,8 @@ public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); + ~ConstBufferLocker(); + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if /// not it will obtain it from maxwell3d and register it. std::optional ObtainKey(u32 buffer, u32 offset); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 6c698bcff..d47c63d9f 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -41,14 +41,10 @@ BlockBranchInfo MakeBranchInfo(Args&&... args) { return std::make_shared(T(std::forward(args)...)); } -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second) { - return false; //(*first) == (*second); -} - bool BlockBranchIsIgnored(BlockBranchInfo first) { bool ignore = false; if (std::holds_alternative(*first)) { - auto branch = std::get_if(first.get()); + const auto branch = std::get_if(first.get()); ignore = branch->ignore; } return ignore; @@ -151,10 +147,10 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& const Instruction instr = {state.program_code[pos]}; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() != OpCode::Id::BRX) { - return {}; + return std::nullopt; } if (instr.brx.constant_buffer != 0) { - return {}; + return std::nullopt; } track_register = instr.gpr8.Value(); result.relative_position = instr.brx.GetBranchExtend(); @@ -172,8 +168,8 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& if (opcode->get().GetId() == OpCode::Id::LD_C) { if (instr.gpr0.Value() == track_register && instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { - result.buffer = instr.cbuf36.index; - result.offset = instr.cbuf36.GetOffset(); + result.buffer = instr.cbuf36.index.Value(); + result.offset = static_cast(instr.cbuf36.GetOffset()); track_register = instr.gpr8.Value(); pos--; found_track = true; @@ -184,7 +180,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& } if (!found_track) { - return {}; + return std::nullopt; } found_track = false; @@ -194,7 +190,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& pos--; continue; } - const Instruction instr = {state.program_code[pos]}; + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { if (instr.gpr0.Value() == track_register) { @@ -208,7 +204,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& } if (!found_track) { - return {}; + return std::nullopt; } found_track = false; @@ -218,7 +214,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& pos--; continue; } - const Instruction instr = {state.program_code[pos]}; + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { if (instr.gpr0.Value() == track_register) { @@ -233,9 +229,9 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& } if (!found_track) { - return {}; + return std::nullopt; } - return {result}; + return result; } std::pair ParseCode(CFGRebuildState& state, u32 address) { @@ -440,8 +436,8 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) branches.emplace_back(value, target); } parse_info.end_address = offset; - parse_info.branch_info = - MakeBranchInfo(static_cast(instr.gpr8.Value()), branches); + parse_info.branch_info = MakeBranchInfo( + static_cast(instr.gpr8.Value()), std::move(branches)); return {ParseResult::ControlCaught, parse_info}; } else { @@ -486,7 +482,7 @@ bool TryInspectAddress(CFGRebuildState& state) { current_block.end = address - 1; new_block.branch = current_block.branch; BlockBranchInfo forward_branch = MakeBranchInfo(); - auto branch = std::get_if(forward_branch.get()); + const auto branch = std::get_if(forward_branch.get()); branch->address = address; branch->ignore = true; current_block.branch = forward_branch; @@ -504,7 +500,7 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; if (std::holds_alternative(*block_info.branch)) { - auto branch = std::get_if(block_info.branch.get()); + const auto branch = std::get_if(block_info.branch.get()); if (branch->condition.IsUnconditional()) { return true; } @@ -550,7 +546,7 @@ bool TryQuery(CFGRebuildState& state) { gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); if (std::holds_alternative(*block.branch)) { - auto branch = std::get_if(block.branch.get()); + const auto branch = std::get_if(block.branch.get()); if (!branch->condition.IsUnconditional()) { q2.address = block.end + 1; state.queries.push_back(q2); @@ -573,8 +569,8 @@ bool TryQuery(CFGRebuildState& state) { state.queries.push_back(std::move(conditional_query)); return true; } - auto multi_branch = std::get_if(block.branch.get()); - for (auto& branch_case : multi_branch->branches) { + const auto multi_branch = std::get_if(block.branch.get()); + for (const auto& branch_case : multi_branch->branches) { Query conditional_query{q2}; conditional_query.address = branch_case.address; state.queries.push_back(std::move(conditional_query)); @@ -612,7 +608,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { return MakeExpr(true); }); if (std::holds_alternative(*branch_info)) { - auto branch = std::get_if(branch_info.get()); + const auto branch = std::get_if(branch_info.get()); if (branch->address < 0) { if (branch->kill) { mm.InsertReturn(get_expr(branch->condition), true); @@ -624,8 +620,8 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - auto multi_branch = std::get_if(branch_info.get()); - for (auto& branch_case : multi_branch->branches) { + const auto multi_branch = std::get_if(branch_info.get()); + for (const auto& branch_case : multi_branch->branches) { mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), branch_case.address); } diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 288ee68af..5304998b9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -51,6 +51,10 @@ public: std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); } + bool operator!=(const SingleBranch& b) const { + return !operator==(b); + } + Condition condition{}; s32 address{exit_branch}; bool kill{}; @@ -67,7 +71,7 @@ struct CaseBranch { class MultiBranch { public: - MultiBranch(u32 gpr, std::vector& branches) + MultiBranch(u32 gpr, std::vector&& branches) : gpr{gpr}, branches{std::move(branches)} {} u32 gpr{}; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index e1afa4582..21fb9cb83 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = program_code.size(); + const std::size_t shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index c369e23ad..f33e9c67c 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -284,7 +284,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, std::optional sampler_info) { - const auto offset = static_cast(sampler.index.Value()); + const auto offset = static_cast(sampler.index.Value()); Tegra::Shader::TextureType type; bool is_array; @@ -293,17 +293,14 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, type = sampler_info->type; is_array = sampler_info->is_array; is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; } else { - auto sampler = locker.ObtainBoundSampler(offset); - if (sampler) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - type = Tegra::Shader::TextureType::Texture2D; - is_array = false; - is_shadow = false; - } + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; } // If this sampler has already been used, return the existing mapping. @@ -320,7 +317,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, const std::size_t next_index = used_samplers.size(); const Sampler entry{offset, next_index, type, is_array, is_shadow}; return *used_samplers.emplace(entry).first; -} +} // namespace VideoCommon::Shader const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, std::optional sampler_info) { @@ -336,17 +333,14 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, type = sampler_info->type; is_array = sampler_info->is_array; is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; } else { - auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); - if (sampler) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - type = Tegra::Shader::TextureType::Texture2D; - is_array = false; - is_shadow = false; - } + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; } // If this sampler has already been used, return the existing mapping. diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index e41d23e93..4e8264367 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -127,6 +127,10 @@ public: return gpr == b.gpr && value == b.value; } + bool operator!=(const ExprGprEqual& b) const { + return !operator==(b); + } + u32 gpr; u32 value; }; -- cgit v1.2.3