summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp13
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp81
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h9
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/layer_pass.cpp68
-rw-r--r--src/shader_recompiler/ir_opt/passes.h1
-rw-r--r--src/shader_recompiler/shader_info.h3
10 files changed, 196 insertions, 20 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index c173b2002..ef09fe2b9 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC
ir_opt/dual_vertex_pass.cpp
ir_opt/global_memory_to_storage_buffer_pass.cpp
ir_opt/identity_removal_pass.cpp
+ ir_opt/layer_pass.cpp
ir_opt/lower_fp16_to_fp32.cpp
ir_opt/lower_int64_to_int32.cpp
ir_opt/passes.h
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 265ac9c85..0f86a8004 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -402,8 +402,10 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
ctx.AddCapability(spv::Capability::SparseResidency);
}
if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) {
- ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
- ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
+ if (profile.supported_spirv < 0x00010600) {
+ ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
+ }
+ ctx.AddCapability(spv::Capability::DemoteToHelperInvocation);
}
if (info.stores[IR::Attribute::ViewportIndex]) {
ctx.AddCapability(spv::Capability::MultiViewport);
@@ -426,12 +428,11 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
info.uses_subgroup_shuffles) &&
profile.support_vote) {
- ctx.AddExtension("SPV_KHR_shader_ballot");
- ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
+ ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
+ ctx.AddCapability(spv::Capability::GroupNonUniformShuffle);
if (!profile.warp_size_potentially_larger_than_guest) {
// vote ops are only used when not taking the long path
- ctx.AddExtension("SPV_KHR_subgroup_vote");
- ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
+ ctx.AddCapability(spv::Capability::GroupNonUniformVote);
}
}
if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 7ad0b08ac..fb2c792c1 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -12,7 +12,7 @@ void EmitJoin(EmitContext&) {
void EmitDemoteToHelperInvocation(EmitContext& ctx) {
if (ctx.profile.support_demote_to_helper_invocation) {
- ctx.OpDemoteToHelperInvocationEXT();
+ ctx.OpDemoteToHelperInvocation();
} else {
const Id kill_label{ctx.OpLabel()};
const Id impossible_label{ctx.OpLabel()};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 7cbbbfaa6..2c90f2368 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -6,6 +6,10 @@
namespace Shader::Backend::SPIRV {
namespace {
+Id SubgroupScope(EmitContext& ctx) {
+ return ctx.Const(static_cast<u32>(spv::Scope::Subgroup));
+}
+
Id GetThreadId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
}
@@ -49,8 +53,9 @@ Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask
}
Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
- return ctx.OpSelect(ctx.U32[1], in_range,
- ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
+ return ctx.OpSelect(
+ ctx.U32[1], in_range,
+ ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
}
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
@@ -71,40 +76,46 @@ Id EmitLaneId(EmitContext& ctx) {
Id EmitVoteAll(EmitContext& ctx, Id pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
- return ctx.OpSubgroupAllKHR(ctx.U1, pred);
+ return ctx.OpGroupNonUniformAll(ctx.U1, SubgroupScope(ctx), pred);
}
- const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id mask_ballot{
+ ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), ctx.true_value)};
const Id active_mask{WarpExtract(ctx, mask_ballot)};
- const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id ballot{
+ WarpExtract(ctx, ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred))};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
return ctx.OpIEqual(ctx.U1, lhs, active_mask);
}
Id EmitVoteAny(EmitContext& ctx, Id pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
- return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
+ return ctx.OpGroupNonUniformAny(ctx.U1, SubgroupScope(ctx), pred);
}
- const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id mask_ballot{
+ ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), ctx.true_value)};
const Id active_mask{WarpExtract(ctx, mask_ballot)};
- const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id ballot{
+ WarpExtract(ctx, ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred))};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
}
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
- return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
+ return ctx.OpGroupNonUniformAllEqual(ctx.U1, SubgroupScope(ctx), pred);
}
- const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id mask_ballot{
+ ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), ctx.true_value)};
const Id active_mask{WarpExtract(ctx, mask_ballot)};
- const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id ballot{
+ WarpExtract(ctx, ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred))};
const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
ctx.OpIEqual(ctx.U1, lhs, active_mask));
}
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
- const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
+ const Id ballot{ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
}
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 376aae0ea..3adbd2b16 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -9,6 +9,7 @@
#include "common/settings.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/post_order.h"
#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate/translate.h"
@@ -233,6 +234,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::VerificationPass(program);
}
Optimization::CollectShaderInfoPass(env, program);
+ Optimization::LayerPass(program, host_info);
+
CollectInterpolationInfo(env, program);
AddNVNStorageBuffers(program);
return program;
@@ -331,4 +334,82 @@ void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& run
}
}
+IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool,
+ const HostTranslateInfo& host_info,
+ IR::Program& source_program,
+ Shader::OutputTopology output_topology) {
+ IR::Program program;
+ program.stage = Stage::Geometry;
+ program.output_topology = output_topology;
+ switch (output_topology) {
+ case OutputTopology::PointList:
+ program.output_vertices = 1;
+ break;
+ case OutputTopology::LineStrip:
+ program.output_vertices = 2;
+ break;
+ default:
+ program.output_vertices = 3;
+ break;
+ }
+
+ program.is_geometry_passthrough = false;
+ program.info.loads.mask = source_program.info.stores.mask;
+ program.info.stores.mask = source_program.info.stores.mask;
+ program.info.stores.Set(IR::Attribute::Layer, true);
+ program.info.stores.Set(source_program.info.emulated_layer, false);
+
+ IR::Block* current_block = block_pool.Create(inst_pool);
+ auto& node{program.syntax_list.emplace_back()};
+ node.type = IR::AbstractSyntaxNode::Type::Block;
+ node.data.block = current_block;
+
+ IR::IREmitter ir{*current_block};
+ for (u32 i = 0; i < program.output_vertices; i++) {
+ // Assign generics from input
+ for (u32 j = 0; j < 32; j++) {
+ if (!program.info.stores.Generic(j)) {
+ continue;
+ }
+
+ const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
+ ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
+ }
+
+ // Assign position from input
+ const IR::Attribute attr = IR::Attribute::PositionX;
+ ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
+
+ // Assign layer
+ ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
+ ir.Imm32(0));
+
+ // Emit vertex
+ ir.EmitVertex(ir.Imm32(0));
+ }
+ ir.EndPrimitive(ir.Imm32(0));
+
+ IR::Block* return_block{block_pool.Create(inst_pool)};
+ IR::IREmitter{*return_block}.Epilogue();
+ current_block->AddBranch(return_block);
+
+ auto& merge{program.syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = return_block;
+ program.syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
+
+ program.blocks = GenerateBlocks(program.syntax_list);
+ program.post_order_blocks = PostOrder(program.syntax_list.front());
+ Optimization::SsaRewritePass(program);
+
+ return program;
+}
+
} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
index 02ede8c9c..497afe7cb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.h
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -25,4 +25,13 @@ namespace Shader::Maxwell {
void ConvertLegacyToGeneric(IR::Program& program, const RuntimeInfo& runtime_info);
+// Maxwell v1 and older Nvidia cards don't support setting gl_Layer from non-geometry stages.
+// This creates a workaround by setting the layer as a generic output and creating a
+// passthrough geometry shader that reads the generic and sets the layer.
+[[nodiscard]] IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool,
+ const HostTranslateInfo& host_info,
+ IR::Program& source_program,
+ Shader::OutputTopology output_topology);
+
} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index cc1500690..d5d279554 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -13,7 +13,8 @@ struct HostTranslateInfo {
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
- bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
+ bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
+ bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
};
} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/layer_pass.cpp b/src/shader_recompiler/ir_opt/layer_pass.cpp
new file mode 100644
index 000000000..4574f7cf2
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/layer_pass.cpp
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <algorithm>
+#include <bit>
+#include <optional>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+
+static IR::Attribute EmulatedLayerAttribute(VaryingState& stores) {
+ for (u32 i = 0; i < 32; i++) {
+ if (!stores.Generic(i)) {
+ return IR::Attribute::Generic0X + (i * 4);
+ }
+ }
+ return IR::Attribute::Layer;
+}
+
+static bool PermittedProgramStage(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void LayerPass(IR::Program& program, const HostTranslateInfo& host_info) {
+ if (host_info.support_viewport_index_layer || !PermittedProgramStage(program.stage)) {
+ return;
+ }
+
+ const auto end{program.post_order_blocks.end()};
+ const auto layer_attribute = EmulatedLayerAttribute(program.info.stores);
+ bool requires_layer_emulation = false;
+
+ for (auto block = program.post_order_blocks.begin(); block != end; ++block) {
+ for (IR::Inst& inst : (*block)->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::SetAttribute &&
+ inst.Arg(0).Attribute() == IR::Attribute::Layer) {
+ requires_layer_emulation = true;
+ inst.SetArg(0, IR::Value{layer_attribute});
+ }
+ }
+ }
+
+ if (requires_layer_emulation) {
+ program.info.requires_layer_emulation = true;
+ program.info.emulated_layer = layer_attribute;
+ program.info.stores.Set(IR::Attribute::Layer, false);
+ program.info.stores.Set(layer_attribute, true);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 586a0668f..11bfe801a 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -23,6 +23,7 @@ void RescalingPass(IR::Program& program);
void SsaRewritePass(IR::Program& program);
void PositionPass(Environment& env, IR::Program& program);
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
+void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
void VerificationPass(const IR::Program& program);
// Dual Vertex
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index ee6252bb5..d9c6e92db 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -204,6 +204,9 @@ struct Info {
u32 nvn_buffer_base{};
std::bitset<16> nvn_buffer_used{};
+ bool requires_layer_emulation{};
+ IR::Attribute emulated_layer{};
+
boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
constant_buffer_descriptors;
boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;