diff options
-rw-r--r-- | src/shader_recompiler/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 1 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 1 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp | 79 |
4 files changed, 82 insertions, 0 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 83b763447..19db17c6d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC ir_opt/rescaling_pass.cpp ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp + ir_opt/vendor_workaround_pass.cpp ir_opt/verification_pass.cpp object_pool.h precompiled_headers.h diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 928b35561..8fac6bad3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::CollectShaderInfoPass(env, program); Optimization::LayerPass(program, host_info); + Optimization::VendorWorkaroundPass(program); CollectInterpolationInfo(env, program); AddNVNStorageBuffers(program); diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..d4d5285e5 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); void PositionPass(Environment& env, IR::Program& program); void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); +void VendorWorkaroundPass(IR::Program& program); void VerificationPass(const IR::Program& program); // Dual Vertex diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +namespace { +void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { + /* + * Workaround for an NVIDIA bug seen in Super Mario RPG + * + * We are looking for this pattern: + * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? + * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 + * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %result = IAdd32 %lhs_shl, %rhs_bfe + * + * And replacing the IAdd32 with a BitwiseOr32 + * %result = BitwiseOr32 %lhs_shl, %rhs_bfe + * + */ + IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; + IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; + if (!lhs_shl || !rhs_bfe) { + return; + } + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { + return; + } + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || + rhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; + if (!lhs_mul) { + return; + } + const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && + lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; + if (!lhs_bfe) { + return; + } + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return; + } + if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); +} + +} // Anonymous namespace + +void VendorWorkaroundPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::IAdd32: + AddingByteSwapsWorkaround(*block, inst); + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization |