summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-02-16 08:10:22 +0100
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:22 +0200
commitb5d7279d878211654b4abb165d94af763a365f47 (patch)
tree9b3a7b6e9d7d2b8945fe87d27ff75f1712ef06aa /src/shader_recompiler/ir_opt
parentshader: Improve object pool (diff)
downloadyuzu-b5d7279d878211654b4abb165d94af763a365f47.tar
yuzu-b5d7279d878211654b4abb165d94af763a365f47.tar.gz
yuzu-b5d7279d878211654b4abb165d94af763a365f47.tar.bz2
yuzu-b5d7279d878211654b4abb165d94af763a365f47.tar.lz
yuzu-b5d7279d878211654b4abb165d94af763a365f47.tar.xz
yuzu-b5d7279d878211654b4abb165d94af763a365f47.tar.zst
yuzu-b5d7279d878211654b4abb165d94af763a365f47.zip
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp81
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp76
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp110
-rw-r--r--src/shader_recompiler/ir_opt/passes.h4
4 files changed, 210 insertions, 61 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..f2326dea1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,81 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+void AddConstantBufferDescriptor(Info& info, u32 index) {
+ auto& descriptor{info.constant_buffers.at(index)};
+ if (descriptor) {
+ return;
+ }
+ descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{
+ .index{index},
+ .count{1},
+ });
+}
+
+void Visit(Info& info, IR::Inst& inst) {
+ switch (inst.Opcode()) {
+ case IR::Opcode::WorkgroupId:
+ info.uses_workgroup_id = true;
+ break;
+ case IR::Opcode::LocalInvocationId:
+ info.uses_local_invocation_id = true;
+ break;
+ case IR::Opcode::FPAbs16:
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPNeg16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPSaturate16:
+ case IR::Opcode::FPTrunc16:
+ info.uses_fp16;
+ break;
+ case IR::Opcode::FPAbs64:
+ case IR::Opcode::FPAdd64:
+ case IR::Opcode::FPCeil64:
+ case IR::Opcode::FPFloor64:
+ case IR::Opcode::FPFma64:
+ case IR::Opcode::FPMax64:
+ case IR::Opcode::FPMin64:
+ case IR::Opcode::FPMul64:
+ case IR::Opcode::FPNeg64:
+ case IR::Opcode::FPRecip64:
+ case IR::Opcode::FPRecipSqrt64:
+ case IR::Opcode::FPRoundEven64:
+ case IR::Opcode::FPSaturate64:
+ case IR::Opcode::FPTrunc64:
+ info.uses_fp64 = true;
+ break;
+ case IR::Opcode::GetCbuf:
+ if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) {
+ AddConstantBufferDescriptor(info, index.U32());
+ } else {
+ throw NotImplementedException("Constant buffer with non-immediate index");
+ }
+ break;
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void CollectShaderInfoPass(IR::Program& program) {
+ Info& info{program.info};
+ for (IR::Function& function : program.functions) {
+ for (IR::Block* const block : function.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ Visit(info, inst);
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index cbde65b9b..f1ad16d60 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -77,6 +77,16 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
return true;
}
+template <typename Func>
+bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
+ if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
+ return false;
+ }
+ using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
+ inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
+ return true;
+}
+
void FoldGetRegister(IR::Inst& inst) {
if (inst.Arg(0).Reg() == IR::Reg::RZ) {
inst.ReplaceUsesWith(IR::Value{u32{0}});
@@ -103,6 +113,52 @@ void FoldAdd(IR::Inst& inst) {
}
}
+void FoldISub32(IR::Inst& inst) {
+ if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
+ return;
+ }
+ if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
+ return;
+ }
+ // ISub32 is generally used to subtract two constant buffers, compare and replace this with
+ // zero if they equal.
+ const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
+ return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf &&
+ a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
+ }};
+ IR::Inst* op_a{inst.Arg(0).InstRecursive()};
+ IR::Inst* op_b{inst.Arg(1).InstRecursive()};
+ if (equal_cbuf(op_a, op_b)) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ return;
+ }
+ // It's also possible a value is being added to a cbuf and then subtracted
+ if (op_b->Opcode() == IR::Opcode::IAdd32) {
+ // Canonicalize local variables to simplify the following logic
+ std::swap(op_a, op_b);
+ }
+ if (op_b->Opcode() != IR::Opcode::GetCbuf) {
+ return;
+ }
+ IR::Inst* const inst_cbuf{op_b};
+ if (op_a->Opcode() != IR::Opcode::IAdd32) {
+ return;
+ }
+ IR::Value add_op_a{op_a->Arg(0)};
+ IR::Value add_op_b{op_a->Arg(1)};
+ if (add_op_b.IsImmediate()) {
+ // Canonicalize
+ std::swap(add_op_a, add_op_b);
+ }
+ if (add_op_b.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
+ if (equal_cbuf(add_cbuf, inst_cbuf)) {
+ inst.ReplaceUsesWith(add_op_a);
+ }
+}
+
template <typename T>
void FoldSelect(IR::Inst& inst) {
const IR::Value cond{inst.Arg(0)};
@@ -170,15 +226,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<
return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)};
}
-template <typename Func>
-void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
- if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
- return;
- }
- using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
- inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
-}
-
void FoldBranchConditional(IR::Inst& inst) {
const IR::U1 cond{inst.Arg(0)};
if (cond.IsImmediate()) {
@@ -205,6 +252,8 @@ void ConstantPropagation(IR::Inst& inst) {
return FoldGetPred(inst);
case IR::Opcode::IAdd32:
return FoldAdd<u32>(inst);
+ case IR::Opcode::ISub32:
+ return FoldISub32(inst);
case IR::Opcode::BitCastF32U32:
return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32);
case IR::Opcode::BitCastU32F32:
@@ -220,17 +269,20 @@ void ConstantPropagation(IR::Inst& inst) {
case IR::Opcode::LogicalNot:
return FoldLogicalNot(inst);
case IR::Opcode::SLessThan:
- return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
+ return;
case IR::Opcode::ULessThan:
- return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
+ return;
case IR::Opcode::BitFieldUExtract:
- return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
+ FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
base, shift, count);
}
return (base >> shift) & ((1U << count) - 1);
});
+ return;
case IR::Opcode::BranchConditional:
return FoldBranchConditional(inst);
default:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index b40c0c57b..bf230a850 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -28,7 +28,8 @@ struct StorageBufferAddr {
/// Block iterator to a global memory instruction and the storage buffer it uses
struct StorageInst {
StorageBufferAddr storage_buffer;
- IR::Block::iterator inst;
+ IR::Inst* inst;
+ IR::Block* block;
};
/// Bias towards a certain range of constant buffers when looking for storage buffers
@@ -41,7 +42,7 @@ struct Bias {
using StorageBufferSet =
boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>,
boost::container::small_vector<StorageBufferAddr, 16>>;
-using StorageInstVector = boost::container::small_vector<StorageInst, 32>;
+using StorageInstVector = boost::container::small_vector<StorageInst, 24>;
/// Returns true when the instruction is a global memory instruction
bool IsGlobalMemory(const IR::Inst& inst) {
@@ -109,23 +110,22 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
}
/// Discards a global memory operation, reads return zero and writes are ignored
-void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
+void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::Value zero{u32{0}};
- switch (inst->Opcode()) {
+ switch (inst.Opcode()) {
case IR::Opcode::LoadGlobalS8:
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS16:
case IR::Opcode::LoadGlobalU16:
case IR::Opcode::LoadGlobal32:
- inst->ReplaceUsesWith(zero);
+ inst.ReplaceUsesWith(zero);
break;
case IR::Opcode::LoadGlobal64:
- inst->ReplaceUsesWith(IR::Value{
- &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})});
+ inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)});
break;
case IR::Opcode::LoadGlobal128:
- inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst(
- inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})});
+ inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)});
break;
case IR::Opcode::WriteGlobalS8:
case IR::Opcode::WriteGlobalU8:
@@ -134,11 +134,10 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
case IR::Opcode::WriteGlobal32:
case IR::Opcode::WriteGlobal64:
case IR::Opcode::WriteGlobal128:
- inst->Invalidate();
+ inst.Invalidate();
break;
default:
- throw LogicError("Invalid opcode to discard its global memory operation {}",
- inst->Opcode());
+ throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode());
}
}
@@ -232,8 +231,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
}
/// Collects the storage buffer used by a global memory instruction and the instruction itself
-void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
- StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) {
+void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set,
+ StorageInstVector& to_replace) {
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to
// avoid getting false positives
static constexpr Bias nvn_bias{
@@ -241,19 +240,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
.offset_begin{0x110},
.offset_end{0x610},
};
- // First try to find storage buffers in the NVN address
- const IR::U64 addr{inst->Arg(0)};
- if (addr.IsImmediate()) {
- // Immediate addresses can't be lowered to a storage buffer
- DiscardGlobalMemory(block, inst);
- return;
- }
// Track the low address of the instruction
- const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(addr.InstRecursive())};
+ const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
if (!low_addr_info) {
DiscardGlobalMemory(block, inst);
return;
}
+ // First try to find storage buffers in the NVN address
const IR::U32 low_addr{low_addr_info->value};
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
if (!storage_buffer) {
@@ -269,21 +262,22 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
storage_buffer_set.insert(*storage_buffer);
to_replace.push_back(StorageInst{
.storage_buffer{*storage_buffer},
- .inst{inst},
+ .inst{&inst},
+ .block{&block},
});
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
-IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) {
- IR::IREmitter ir{block, inst};
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
- if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&*inst)}) {
+ if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
offset = low_addr->value;
if (low_addr->imm_offset != 0) {
offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
}
} else {
- offset = ir.ConvertU(32, IR::U64{inst->Arg(0)});
+ offset = ir.ConvertU(32, IR::U64{inst.Arg(0)});
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
@@ -292,25 +286,27 @@ IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferA
}
/// Replace a global memory load instruction with its storage buffer equivalent
-void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
const IR::U32& offset) {
- const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
- const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})};
- inst->ReplaceUsesWith(value);
+ const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
+ inst.ReplaceUsesWith(value);
}
/// Replace a global memory write instruction with its storage buffer equivalent
-void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
const IR::U32& offset) {
- const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
- block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)});
- inst->Invalidate();
+ const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
+ inst.Invalidate();
}
/// Replace a global memory instruction with its storage buffer equivalent
-void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
const IR::U32& offset) {
- switch (inst->Opcode()) {
+ switch (inst.Opcode()) {
case IR::Opcode::LoadGlobalS8:
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS16:
@@ -328,26 +324,44 @@ void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_
case IR::Opcode::WriteGlobal128:
return ReplaceWrite(block, inst, storage_index, offset);
default:
- throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode());
+ throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode());
}
}
} // Anonymous namespace
-void GlobalMemoryToStorageBufferPass(IR::Block& block) {
+void GlobalMemoryToStorageBufferPass(IR::Program& program) {
StorageBufferSet storage_buffers;
StorageInstVector to_replace;
- for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) {
- if (!IsGlobalMemory(*inst)) {
- continue;
+ for (IR::Function& function : program.functions) {
+ for (IR::Block* const block : function.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (!IsGlobalMemory(inst)) {
+ continue;
+ }
+ CollectStorageBuffers(*block, inst, storage_buffers, to_replace);
+ }
}
- CollectStorageBuffers(block, inst, storage_buffers, to_replace);
}
- for (const auto [storage_buffer, inst] : to_replace) {
- const auto it{storage_buffers.find(storage_buffer)};
- const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}};
- const IR::U32 offset{StorageOffset(block, inst, storage_buffer)};
- Replace(block, inst, storage_index, offset);
+ Info& info{program.info};
+ u32 storage_index{};
+ for (const StorageBufferAddr& storage_buffer : storage_buffers) {
+ info.storage_buffers_descriptors.push_back({
+ .cbuf_index{storage_buffer.index},
+ .cbuf_offset{storage_buffer.offset},
+ .count{1},
+ });
+ info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back();
+ ++storage_index;
+ }
+ for (const StorageInst& storage_inst : to_replace) {
+ const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
+ const auto it{storage_buffers.find(storage_inst.storage_buffer)};
+ const IR::U32 index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}};
+ IR::Block* const block{storage_inst.block};
+ IR::Inst* const inst{storage_inst.inst};
+ const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ Replace(*block, *inst, index, offset);
}
}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 30eb31588..89e5811d3 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -8,6 +8,7 @@
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/function.h"
+#include "shader_recompiler/frontend/ir/program.h"
namespace Shader::Optimization {
@@ -18,9 +19,10 @@ void PostOrderInvoke(Func&& func, IR::Function& function) {
}
}
+void CollectShaderInfoPass(IR::Program& program);
void ConstantPropagationPass(IR::Block& block);
void DeadCodeEliminationPass(IR::Block& block);
-void GlobalMemoryToStorageBufferPass(IR::Block& block);
+void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Function& function);
void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
void VerificationPass(const IR::Function& function);