From b5d7279d878211654b4abb165d94af763a365f47 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 04:10:22 -0300 Subject: spirv: Initial bindings support --- .../ir_opt/collect_shader_info_pass.cpp | 81 +++++++++++++++ .../ir_opt/constant_propagation_pass.cpp | 76 +++++++++++--- .../global_memory_to_storage_buffer_pass.cpp | 110 ++++++++++++--------- src/shader_recompiler/ir_opt/passes.h | 4 +- 4 files changed, 210 insertions(+), 61 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp (limited to 'src/shader_recompiler/ir_opt') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp new file mode 100644 index 000000000..f2326dea1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -0,0 +1,81 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +void AddConstantBufferDescriptor(Info& info, u32 index) { + auto& descriptor{info.constant_buffers.at(index)}; + if (descriptor) { + return; + } + descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void Visit(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::FPAbs16: + case IR::Opcode::FPAdd16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPNeg16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPSaturate16: + case IR::Opcode::FPTrunc16: + info.uses_fp16; + break; + case IR::Opcode::FPAbs64: + case IR::Opcode::FPAdd64: + case IR::Opcode::FPCeil64: + case IR::Opcode::FPFloor64: + case IR::Opcode::FPFma64: + case IR::Opcode::FPMax64: + case IR::Opcode::FPMin64: + case IR::Opcode::FPMul64: + case IR::Opcode::FPNeg64: + case IR::Opcode::FPRecip64: + case IR::Opcode::FPRecipSqrt64: + case IR::Opcode::FPRoundEven64: + case IR::Opcode::FPSaturate64: + case IR::Opcode::FPTrunc64: + info.uses_fp64 = true; + break; + case IR::Opcode::GetCbuf: + if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { + AddConstantBufferDescriptor(info, index.U32()); + } else { + throw NotImplementedException("Constant buffer with non-immediate index"); + } + break; + default: + break; + } +} +} // Anonymous namespace + +void CollectShaderInfoPass(IR::Program& program) { + Info& info{program.info}; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index cbde65b9b..f1ad16d60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -77,6 +77,16 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { return true; } +template +bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return false; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + return true; +} + void FoldGetRegister(IR::Inst& inst) { if (inst.Arg(0).Reg() == IR::Reg::RZ) { inst.ReplaceUsesWith(IR::Value{u32{0}}); @@ -103,6 +113,52 @@ void FoldAdd(IR::Inst& inst) { } } +void FoldISub32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { + return; + } + if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { + return; + } + // ISub32 is generally used to subtract two constant buffers, compare and replace this with + // zero if they equal. + const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { + return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + }}; + IR::Inst* op_a{inst.Arg(0).InstRecursive()}; + IR::Inst* op_b{inst.Arg(1).InstRecursive()}; + if (equal_cbuf(op_a, op_b)) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + return; + } + // It's also possible a value is being added to a cbuf and then subtracted + if (op_b->Opcode() == IR::Opcode::IAdd32) { + // Canonicalize local variables to simplify the following logic + std::swap(op_a, op_b); + } + if (op_b->Opcode() != IR::Opcode::GetCbuf) { + return; + } + IR::Inst* const inst_cbuf{op_b}; + if (op_a->Opcode() != IR::Opcode::IAdd32) { + return; + } + IR::Value add_op_a{op_a->Arg(0)}; + IR::Value add_op_b{op_a->Arg(1)}; + if (add_op_b.IsImmediate()) { + // Canonicalize + std::swap(add_op_a, add_op_b); + } + if (add_op_b.IsImmediate()) { + return; + } + IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; + if (equal_cbuf(add_cbuf, inst_cbuf)) { + inst.ReplaceUsesWith(add_op_a); + } +} + template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; @@ -170,15 +226,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg>(inst.Arg(I))...)}; } -template -void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); -} - void FoldBranchConditional(IR::Inst& inst) { const IR::U1 cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -205,6 +252,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldGetPred(inst); case IR::Opcode::IAdd32: return FoldAdd(inst); + case IR::Opcode::ISub32: + return FoldISub32(inst); case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: @@ -220,17 +269,20 @@ void ConstantPropagation(IR::Inst& inst) { case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); case IR::Opcode::SLessThan: - return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + return; case IR::Opcode::ULessThan: - return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + return; case IR::Opcode::BitFieldUExtract: - return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } return (base >> shift) & ((1U << count) - 1); }); + return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); default: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index b40c0c57b..bf230a850 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -28,7 +28,8 @@ struct StorageBufferAddr { /// Block iterator to a global memory instruction and the storage buffer it uses struct StorageInst { StorageBufferAddr storage_buffer; - IR::Block::iterator inst; + IR::Inst* inst; + IR::Block* block; }; /// Bias towards a certain range of constant buffers when looking for storage buffers @@ -41,7 +42,7 @@ struct Bias { using StorageBufferSet = boost::container::flat_set, boost::container::small_vector>; -using StorageInstVector = boost::container::small_vector; +using StorageInstVector = boost::container::small_vector; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -109,23 +110,22 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce } /// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: case IR::Opcode::LoadGlobalU16: case IR::Opcode::LoadGlobal32: - inst->ReplaceUsesWith(zero); + inst.ReplaceUsesWith(zero); break; case IR::Opcode::LoadGlobal64: - inst->ReplaceUsesWith(IR::Value{ - &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); break; case IR::Opcode::LoadGlobal128: - inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( - inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); break; case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: @@ -134,11 +134,10 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: - inst->Invalidate(); + inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst->Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); } } @@ -232,8 +231,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) } /// Collects the storage buffer used by a global memory instruction and the instruction itself -void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, - StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, + StorageInstVector& to_replace) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -241,19 +240,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, .offset_begin{0x110}, .offset_end{0x610}, }; - // First try to find storage buffers in the NVN address - const IR::U64 addr{inst->Arg(0)}; - if (addr.IsImmediate()) { - // Immediate addresses can't be lowered to a storage buffer - DiscardGlobalMemory(block, inst); - return; - } // Track the low address of the instruction - const std::optional low_addr_info{TrackLowAddress(addr.InstRecursive())}; + const std::optional low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { DiscardGlobalMemory(block, inst); return; } + // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { @@ -269,21 +262,22 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{inst}, + .inst{&inst}, + .block{&block}, }); } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { - IR::IREmitter ir{block, inst}; +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; - if (const std::optional low_addr{TrackLowAddress(&*inst)}) { + if (const std::optional low_addr{TrackLowAddress(&inst)}) { offset = low_addr->value; if (low_addr->imm_offset != 0) { offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); + offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. @@ -292,25 +286,27 @@ IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferA } /// Replace a global memory load instruction with its storage buffer equivalent -void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; - inst->ReplaceUsesWith(value); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; + inst.ReplaceUsesWith(value); } /// Replace a global memory write instruction with its storage buffer equivalent -void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); - inst->Invalidate(); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); + inst.Invalidate(); } /// Replace a global memory instruction with its storage buffer equivalent -void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -328,26 +324,44 @@ void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_ case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); } } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Block& block) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { - if (!IsGlobalMemory(*inst)) { - continue; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; + } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + } } - CollectStorageBuffers(block, inst, storage_buffers, to_replace); } - for (const auto [storage_buffer, inst] : to_replace) { - const auto it{storage_buffers.find(storage_buffer)}; - const IR::U32 storage_index{IR::Value{static_cast(storage_buffers.index_of(it))}}; - const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; - Replace(block, inst, storage_index, offset); + Info& info{program.info}; + u32 storage_index{}; + for (const StorageBufferAddr& storage_buffer : storage_buffers) { + info.storage_buffers_descriptors.push_back({ + .cbuf_index{storage_buffer.index}, + .cbuf_offset{storage_buffer.offset}, + .count{1}, + }); + info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); + ++storage_index; + } + for (const StorageInst& storage_inst : to_replace) { + const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; + const auto it{storage_buffers.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + IR::Block* const block{storage_inst.block}; + IR::Inst* const inst{storage_inst.inst}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 30eb31588..89e5811d3 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,6 +8,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { @@ -18,9 +19,10 @@ void PostOrderInvoke(Func&& func, IR::Function& function) { } } +void CollectShaderInfoPass(IR::Program& program); void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); -void GlobalMemoryToStorageBufferPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); -- cgit v1.2.3