From fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 2 Jun 2019 18:52:07 -0300 Subject: shader: Split SSY and PBK stack Hardware testing revealed that SSY and PBK push to a different stack, allowing code like this: SSY label1; PBK label2; SYNC; label1: PBK; label2: EXIT; --- .../renderer_opengl/gl_shader_decompiler.cpp | 31 ++++++++++++-- .../renderer_vulkan/vk_shader_decompiler.cpp | 49 ++++++++++++++++------ src/video_core/shader/decode/other.cpp | 18 ++++---- src/video_core/shader/node.h | 7 +++- 4 files changed, 78 insertions(+), 27 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f2d0722af..afcc06afc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) { return static_cast(index) - static_cast(Attribute::Index::Attribute_0); } +constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { + switch (stack) { + case MetaStackClass::Ssy: + return "ssy"; + case MetaStackClass::Pbk: + return "pbk"; + } + return {}; +} + +std::string FlowStackName(MetaStackClass stack) { + return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); +} + +std::string FlowStackTopName(MetaStackClass stack) { + return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -173,8 +191,10 @@ public: // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); - code.AddLine("uint flow_stack_top = 0u;"); + for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { + code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); + code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + } code.AddLine("while (true) {{"); ++code.scope; @@ -1438,15 +1458,18 @@ private: } std::string PushFlowStack(Operation operation) { + const auto stack = std::get(operation.GetMeta()); const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); + code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), + target->GetValue()); return {}; } std::string PopFlowStack(Operation operation) { - code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); + const auto stack = std::get(operation.GetMeta()); + code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); code.AddLine("break;"); return {}; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 547883425..33ad9764a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -132,20 +132,16 @@ public: branch_labels.push_back(label); } - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), spv::StorageClass::Function, Constant(t_uint, first_address))); - flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), - spv::StorageClass::Function, ConstantNull(flow_stack_type))); - flow_stack_top = - Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0))); + std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); + std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); Name(jmp_to, "jmp_to"); - Name(flow_stack, "flow_stack"); - Name(flow_stack_top, "flow_stack_top"); + Name(ssy_flow_stack, "ssy_flow_stack"); + Name(ssy_flow_stack_top, "ssy_flow_stack_top"); + Name(pbk_flow_stack, "pbk_flow_stack"); + Name(pbk_flow_stack_top, "pbk_flow_stack_top"); Emit(OpBranch(loop_label)); Emit(loop_label); @@ -952,6 +948,7 @@ private: const auto target = std::get_if(&*operation[0]); ASSERT(target); + const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); const Id current = Emit(OpLoad(t_uint, flow_stack_top)); const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); @@ -962,6 +959,7 @@ private: } Id PopFlowStack(Operation operation) { + const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); const Id current = Emit(OpLoad(t_uint, flow_stack_top)); const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); @@ -1172,6 +1170,31 @@ private: Emit(skip_label); } + std::tuple CreateFlowStack() { + // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely + // that shaders will use 20 nested SSYs and PBKs. + constexpr u32 FLOW_STACK_SIZE = 20; + constexpr auto storage_class = spv::StorageClass::Function; + + const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); + const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, + ConstantNull(flow_stack_type))); + const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0))); + return std::tie(stack, top); + } + + std::pair GetFlowStack(Operation operation) { + const auto stack_class = std::get(operation.GetMeta()); + switch (stack_class) { + case MetaStackClass::Ssy: + return {ssy_flow_stack, ssy_flow_stack_top}; + case MetaStackClass::Pbk: + return {pbk_flow_stack, pbk_flow_stack_top}; + } + UNREACHABLE(); + return {}; + } + static constexpr OperationDecompilersArray operation_decompilers = { &SPIRVDecompiler::Assign, @@ -1414,8 +1437,10 @@ private: Id execute_function{}; Id jmp_to{}; - Id flow_stack_top{}; - Id flow_stack{}; + Id ssy_flow_stack_top{}; + Id pbk_flow_stack_top{}; + Id ssy_flow_stack{}; + Id pbk_flow_stack{}; Id continue_label{}; std::map labels; }; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 6fc07f213..d46a8ab82 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -109,22 +109,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer flow is not supported"); - // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the - // target of the jump that the SYNC instruction will make. The SSY opcode has a similar - // structure to the BRA opcode. + // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); + bb.push_back( + Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); break; } case OpCode::Id::PBK: { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer PBK is not supported"); - // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but - // using SYNC on a PBK address will kill the shader execution. We don't emulate this because - // it's very unlikely a driver will emit such invalid shader. + // PBK pushes to a stack the address where BRK will jump to. const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); + bb.push_back( + Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); break; } case OpCode::Id::SYNC: { @@ -133,7 +131,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { static_cast(cc)); // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack)); + bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); break; } case OpCode::Id::BRK: { @@ -142,7 +140,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { static_cast(cc)); // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack)); + bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); break; } case OpCode::Id::IPA: { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c002f90f9..3cfb911bb 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -174,6 +174,11 @@ enum class InternalFlag { Amount = 4, }; +enum class MetaStackClass { + Ssy, + Pbk, +}; + class OperationNode; class ConditionalNode; class GprNode; @@ -285,7 +290,7 @@ struct MetaTexture { }; /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant; +using Meta = std::variant; /// Holds any kind of operation that can be done in the IR class OperationNode final { -- cgit v1.2.3