diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 47 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.h | 30 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic.cpp | 13 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_half_immediate.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 30 | ||||
-rw-r--r-- | src/video_core/shader/decode/ffma.cpp | 10 | ||||
-rw-r--r-- | src/video_core/shader/decode/half_set_predicate.cpp | 71 | ||||
-rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 28 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 4 |
12 files changed, 160 insertions, 91 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index fdcc970ff..ec3a76690 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -15,7 +15,7 @@ #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { - +namespace { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -29,8 +29,7 @@ struct Query { struct BlockStack { BlockStack() = default; - BlockStack(const BlockStack& b) = default; - BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} std::stack<u32> ssy_stack{}; std::stack<u32> pbk_stack{}; }; @@ -58,7 +57,7 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, const u32 start) - : program_code{program_code}, program_size{program_size}, start{start} {} + : start{start}, program_code{program_code}, program_size{program_size} {} u32 start{}; std::vector<BlockInfo> block_info{}; @@ -85,7 +84,7 @@ std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) return {BlockCollision::Inside, index}; } } - return {BlockCollision::None, -1}; + return {BlockCollision::None, 0xFFFFFFFF}; } struct ParseInfo { @@ -365,27 +364,29 @@ bool TryQuery(CFGRebuildState& state) { const auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { cc.push(gather_start->second); - gather_start++; + ++gather_start; } }; if (state.queries.empty()) { return false; } + Query& q = state.queries.front(); const u32 block_index = state.registered[q.address]; BlockInfo& block = state.block_info[block_index]; - // If the block is visted, check if the stacks match, else gather the ssy/pbk + // If the block is visited, check if the stacks match, else gather the ssy/pbk // labels into the current stack and look if the branch at the end of the block // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); + const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && + (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); state.queries.pop_front(); return all_okay; } block.visited = true; - state.stacks[q.address] = BlockStack{q}; + state.stacks.insert_or_assign(q.address, BlockStack{q}); + Query q2(q); state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); @@ -394,6 +395,7 @@ bool TryQuery(CFGRebuildState& state) { q2.address = block.end + 1; state.queries.push_back(q2); } + Query conditional_query{q2}; if (block.branch.is_sync) { if (block.branch.address == unassigned_branch) { @@ -408,13 +410,15 @@ bool TryQuery(CFGRebuildState& state) { conditional_query.pbk_stack.pop(); } conditional_query.address = block.branch.address; - state.queries.push_back(conditional_query); + state.queries.push_back(std::move(conditional_query)); return true; } +} // Anonymous namespace -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address) { +std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address) { CFGRebuildState state{program_code, program_size, start_address}; + // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); @@ -424,10 +428,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u return {}; } } + // Decompile Stacks - Query start_query{}; - start_query.address = state.start; - state.queries.push_back(start_query); + state.queries.push_back(Query{state.start, {}, {}}); bool decompiled = true; while (!state.queries.empty()) { if (!TryQuery(state)) { @@ -435,14 +438,15 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u break; } } + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); ShaderCharacteristics result_out{}; result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; - for (auto& block : state.block_info) { + for (const auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; @@ -457,8 +461,9 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u } if (result_out.decompilable) { result_out.labels = std::move(state.labels); - return {result_out}; + return {std::move(result_out)}; } + // If it's not decompilable, merge the unlabelled blocks together auto back = result_out.blocks.begin(); auto next = std::next(back); @@ -469,8 +474,8 @@ std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u continue; } back = next; - next++; + ++next; } - return {result_out}; + return {std::move(result_out)}; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5e8ea3271..b0a5e4f8c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -4,7 +4,6 @@ #pragma once -#include <cstring> #include <list> #include <optional> #include <unordered_set> @@ -26,27 +25,44 @@ struct Condition { bool IsUnconditional() const { return predicate == Pred::UnusedIndex && cc == ConditionCode::T; } + bool operator==(const Condition& other) const { return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); } + + bool operator!=(const Condition& other) const { + return !operator==(other); + } }; struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; struct Branch { Condition cond{}; bool kills{}; s32 address{}; + bool operator==(const Branch& b) const { return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); } - } branch{}; + + bool operator!=(const Branch& b) const { + return !operator==(b); + } + }; + + u32 start{}; + u32 end{}; + bool ignore_branch{}; + Branch branch{}; + bool operator==(const ShaderBlock& sb) const { return std::tie(start, end, ignore_branch, branch) == std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); } + + bool operator!=(const ShaderBlock& sb) const { + return !operator==(sb); + } }; struct ShaderCharacteristics { @@ -57,7 +73,7 @@ struct ShaderCharacteristics { std::unordered_set<u32> labels{}; }; -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address); +std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index afffd157f..b547d8323 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -47,14 +47,14 @@ void ShaderIR::Decode() { if (shader_info.decompilable) { disable_flow_stack = true; const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == exit_branch) { + if (label == static_cast<u32>(exit_branch)) { return; } basic_blocks.insert({label, nodes}); }; const auto& blocks = shader_info.blocks; NodeBlock current_block; - u32 current_label = exit_branch; + u32 current_label = static_cast<u32>(exit_branch); for (auto& block : blocks) { if (shader_info.labels.count(block.start) != 0) { insert_block(current_block, current_label); diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - UNIMPLEMENTED_IF_MSG( - instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default + if (instr.fmul.tab5cb8_2 != 0) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + } + if (instr.fmul.tab5c68_0 != 1) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); + } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..8973fbefa 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); @@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2F_R: case OpCode::Id::F2F_C: case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); @@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + // TODO: figure where extract is sey in the encoding + value = Operation(OperationCode::FCastHalf0, PRECISE, value); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { @@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast<u32>(instr.conversion.f2f.rounding.Value())); - return Immediate(0); + return value; } }(); value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); + + if (instr.conversion.dst_size == Register::Size::Short) { + value = Operation(OperationCode::HCastFloat, PRECISE, value); + } + SetRegister(bb, instr.gpr0, value); break; } case OpCode::Id::F2I_R: case OpCode::Id::F2I_C: case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2I is not implemented"); Node value = [&]() { @@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { } }(); + if (instr.conversion.src_size == Register::Size::Short) { + // TODO: figure where extract is sey in the encoding + value = Operation(OperationCode::FCastHalf0, PRECISE, value); + } + value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + if (instr.ffma.tab5980_0 != 1) { + LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + } + if (instr.ffma.tab5980_1 != 0) { + LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..afea33e5f 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -18,43 +18,56 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); + DEBUG_ASSERT(instr.hsetp2.ftz == 0); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - Node op_b = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_R: - return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, - instr.hsetp2.negate_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); + Tegra::Shader::PredCondition cond{}; + bool h_and{}; + Node op_b{}; + switch (opcode->get().GetId()) { + case OpCode::Id::HSETP2_C: + cond = instr.hsetp2.cbuf_and_imm.cond; + h_and = instr.hsetp2.cbuf_and_imm.h_and; + op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), + instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); + break; + case OpCode::Id::HSETP2_IMM: + cond = instr.hsetp2.cbuf_and_imm.cond; + h_and = instr.hsetp2.cbuf_and_imm.h_and; + op_b = UnpackHalfImmediate(instr, true); + break; + case OpCode::Id::HSETP2_R: + cond = instr.hsetp2.reg.cond; + h_and = instr.hsetp2.reg.h_and; + op_b = + UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, + instr.hsetp2.reg.negate_b), + instr.hsetp2.reg.type_b); + break; + default: + UNREACHABLE(); + op_b = Immediate(0); + } const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const OperationCode pair_combiner = - instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - - const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); - const Node first_pred = Operation(pair_combiner, comparison); + const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred); - // Set the primary predicate to the result of Predicate OP SecondPredicate - const Node value = Operation(combiner, first_pred, second_pred); - SetPredicate(bb, instr.hsetp2.pred3, value); + const auto Write = [&](u64 dest, Node src) { + SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); + }; - if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); + const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); + const u64 first = instr.hsetp2.pred0; + const u64 second = instr.hsetp2.pred39; + if (h_and) { + const Node joined = Operation(OperationCode::LogicalAnd2, comparison); + Write(first, joined); + Write(second, Operation(OperationCode::LogicalNegate, joined)); + } else { + Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); + Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); } return pc; diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); } else { - UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); } constexpr auto identity = HalfType::H0_H1; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c0f64d7a0..ac0e764d6 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { + case OpCode::Id::NOP: { + UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); + UNIMPLEMENTED_IF(instr.nop.trigger != 0); + // With the previous preconditions, this instruction is a no-operation. + break; + } case OpCode::Id::EXIT: { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 7427ed896..5f0852364 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -30,6 +30,8 @@ enum class OperationCode { FNegate, /// (MetaArithmetic, float a) -> float FAbsolute, /// (MetaArithmetic, float a) -> float FClamp, /// (MetaArithmetic, float value, float min, float max) -> float + FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float + FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float FMin, /// (MetaArithmetic, float a, float b) -> float FMax, /// (MetaArithmetic, float a, float b) -> float FCos, /// (MetaArithmetic, float a) -> float @@ -83,17 +85,18 @@ enum class OperationCode { UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAbsolute, /// (f16vec2 a) -> f16vec2 + HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 + HMergeF32, /// (f16vec2 src) -> float + HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HPack2, /// (float a, float b) -> f16vec2 LogicalAssign, /// (bool& dst, bool src) -> void LogicalAnd, /// (bool a, bool b) -> bool @@ -101,8 +104,7 @@ enum class OperationCode { LogicalXor, /// (bool a, bool b) -> bool LogicalNegate, /// (bool a) -> bool LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAll2, /// (bool2 a) -> bool - LogicalAny2, /// (bool2 a) -> bool + LogicalAnd2, /// (bool2 a) -> bool LogicalFLessThan, /// (float a, float b) -> bool LogicalFEqual, /// (float a, float b) -> bool diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index a53e02253..55f5949e4 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -59,8 +59,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co return TrackCbuf(source, code, new_cursor); } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { - if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { // Cbuf found in operand. return found; } |