summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h31
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp51
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp120
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h8
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_state.h7
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h9
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp136
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp152
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h8
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp1
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp1
-rw-r--r--src/video_core/shader/decode/bfe.cpp1
-rw-r--r--src/video_core/shader/decode/bfi.cpp1
-rw-r--r--src/video_core/shader/decode/conversion.cpp1
-rw-r--r--src/video_core/shader/decode/ffma.cpp1
-rw-r--r--src/video_core/shader/decode/float_set.cpp1
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/half_set.cpp1
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/hfma2.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp9
-rw-r--r--src/video_core/shader/decode/other.cpp1
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp1
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/shift.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp9
-rw-r--r--src/video_core/shader/decode/video.cpp1
-rw-r--r--src/video_core/shader/decode/xmad.cpp1
-rw-r--r--src/video_core/shader/node.h514
-rw-r--r--src/video_core/shader/node_helper.cpp99
-rw-r--r--src/video_core/shader/node_helper.h65
-rw-r--r--src/video_core/shader/shader_ir.cpp102
-rw-r--r--src/video_core/shader/shader_ir.h551
-rw-r--r--src/video_core/shader/track.cpp18
-rw-r--r--src/video_core/textures/texture.h2
53 files changed, 1072 insertions, 1061 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 1e010e4da..2d4caa08d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -42,8 +42,6 @@ add_library(video_core STATIC
renderer_opengl/gl_device.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
- renderer_opengl/gl_primitive_assembler.cpp
- renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
@@ -102,6 +100,9 @@ add_library(video_core STATIC
shader/decode/xmad.cpp
shader/decode/other.cpp
shader/decode.cpp
+ shader/node_helper.cpp
+ shader/node_helper.h
+ shader/node.h
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 25652e794..48b86f3bd 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -71,16 +71,6 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
return uploaded_offset;
}
-std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
- AlignBuffer(alignment);
- u8* const uploaded_ptr = buffer_ptr;
- const GLintptr uploaded_offset = buffer_offset;
-
- buffer_ptr += size;
- buffer_offset += size;
- return std::make_tuple(uploaded_ptr, uploaded_offset);
-}
-
bool OGLBufferCache::Map(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f9247a40e..f2347581b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -61,9 +61,6 @@ public:
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
- /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
- std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
-
bool Map(std::size_t max_size);
void Unmap();
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
deleted file mode 100644
index c3e94d917..000000000
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_buffer_cache.h"
-#include "video_core/renderer_opengl/gl_primitive_assembler.h"
-
-namespace OpenGL {
-
-constexpr u32 TRIANGLES_PER_QUAD = 6;
-constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
-
-PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
-
-PrimitiveAssembler::~PrimitiveAssembler() = default;
-
-std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
- ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
- return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
-}
-
-GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
- const std::size_t size{CalculateQuadSize(count)};
- auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
-
- for (u32 primitive = 0; primitive < count / 4; ++primitive) {
- for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
- const u32 index = first + primitive * 4 + QUAD_MAP[i];
- std::memcpy(dst_pointer, &index, sizeof(index));
- dst_pointer += sizeof(index);
- }
- }
-
- return index_offset;
-}
-
-GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
- const std::size_t map_size{CalculateQuadSize(count)};
- auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
-
- auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const u8* source{memory_manager.GetPointer(gpu_addr)};
-
- for (u32 primitive = 0; primitive < count / 4; ++primitive) {
- for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
- const u32 index = primitive * 4 + QUAD_MAP[i];
- const u8* src_offset = source + (index * index_size);
-
- std::memcpy(dst_pointer, src_offset, index_size);
- dst_pointer += index_size;
- }
- }
-
- return index_offset;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
deleted file mode 100644
index 4e87ce4d6..000000000
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-
-namespace OpenGL {
-
-class OGLBufferCache;
-
-class PrimitiveAssembler {
-public:
- explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
- ~PrimitiveAssembler();
-
- /// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
- std::size_t CalculateQuadSize(u32 count) const;
-
- GLintptr MakeQuadArray(u32 first, u32 count);
-
- GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
-
-private:
- OGLBufferCache& buffer_cache;
-};
-
-} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f9b6dfeea..ca410287a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -246,29 +246,6 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
DrawParameters params{};
params.current_instance = gpu.state.current_instance;
- if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
- MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
-
- params.use_indexed = true;
- params.primitive_mode = GL_TRIANGLES;
-
- if (is_indexed) {
- params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- params.count = (regs.index_array.count / 4) * 6;
- params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
- regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
- regs.index_array.count);
- params.base_vertex = static_cast<GLint>(regs.vb_element_base);
- } else {
- // MakeQuadArray always generates u32 indexes
- params.index_format = GL_UNSIGNED_INT;
- params.count = (regs.vertex_buffer.count / 4) * 6;
- params.index_buffer_offset = primitive_assembler.MakeQuadArray(
- regs.vertex_buffer.first, regs.vertex_buffer.count);
- }
- return params;
- }
-
params.use_indexed = is_indexed;
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
@@ -686,30 +663,19 @@ void RasterizerOpenGL::DrawArrays() {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest(state);
- // Alpha Testing is synced on shaders.
SyncTransformFeedback();
SyncPointState();
- CheckAlphaTests();
SyncPolygonOffset();
- // TODO(bunnei): Sync framebuffer_scale uniform here
- // TODO(bunnei): Sync scissorbox uniform(s) here
+ SyncAlphaTest();
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
std::size_t buffer_size = CalculateVertexArraysSize();
- // Add space for index buffer (keeping in mind non-core primitives)
- switch (regs.draw.topology) {
- case Maxwell::PrimitiveTopology::Quads:
- buffer_size = Common::AlignUp(buffer_size, 4) +
- primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
- break;
- default:
- if (is_indexed) {
- buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
- }
- break;
+ // Add space for index buffer
+ if (is_indexed) {
+ buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
}
// Uniform space for the 5 shader stages
@@ -1152,10 +1118,17 @@ void RasterizerOpenGL::SyncPolygonOffset() {
state.polygon_offset.clamp = regs.polygon_offset_clamp;
}
-void RasterizerOpenGL::CheckAlphaTests() {
+void RasterizerOpenGL::SyncAlphaTest() {
const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
"Alpha Testing is enabled with more than one rendertarget");
+
+ state.alpha_test.enabled = regs.alpha_test_enabled;
+ if (!state.alpha_test.enabled) {
+ return;
+ }
+ state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func);
+ state.alpha_test.ref = regs.alpha_test_ref;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d78094138..2817f65c9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,7 +23,6 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
-#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
@@ -167,8 +166,8 @@ private:
/// Syncs the polygon offsets
void SyncPolygonOffset();
- /// Check asserts for alpha testing.
- void CheckAlphaTests();
+ /// Syncs the alpha test state to match the guest state
+ void SyncAlphaTest();
/// Check for extension that are not strictly required
/// but are needed for correct emulation
@@ -197,7 +196,6 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
- PrimitiveAssembler primitive_assembler{buffer_cache};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3b61bf77f..739477cc9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,7 +45,6 @@ struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
-enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
@@ -124,8 +123,8 @@ bool IsPrecise(Operation operand) {
return false;
}
-bool IsPrecise(Node node) {
- if (const auto operation = std::get_if<OperationNode>(node)) {
+bool IsPrecise(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
return IsPrecise(*operation);
}
return false;
@@ -247,6 +246,12 @@ private:
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
code.AddNewLine();
+ code.AddLine("in gl_PerVertex {{");
+ ++code.scope;
+ code.AddLine("vec4 gl_Position;");
+ --code.scope;
+ code.AddLine("}} gl_in[];");
+
DeclareVertexRedeclarations();
}
@@ -349,7 +354,7 @@ private:
}
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 generic_index{GetGenericAttributeIndex(index)};
+ const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
if (stage == ShaderStage::Geometry) {
@@ -358,19 +363,13 @@ private:
std::string suffix;
if (stage == ShaderStage::Fragment) {
- const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+ const auto input_mode{header.ps.GetAttributeUse(location)};
if (skip_unused && input_mode == AttributeUse::Unused) {
return;
}
suffix = GetInputFlags(input_mode);
}
- u32 location = generic_index;
- if (stage != ShaderStage::Vertex) {
- // If inputs are varyings, add an offset
- location += GENERIC_VARYING_START_LOCATION;
- }
-
code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
}
@@ -395,7 +394,7 @@ private:
}
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+ const u32 location{GetGenericAttributeIndex(index)};
code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
}
@@ -498,15 +497,15 @@ private:
}
void VisitBlock(const NodeBlock& bb) {
- for (const Node node : bb) {
+ for (const auto& node : bb) {
if (const std::string expr = Visit(node); !expr.empty()) {
code.AddLine(expr);
}
}
}
- std::string Visit(Node node) {
- if (const auto operation = std::get_if<OperationNode>(node)) {
+ std::string Visit(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -520,7 +519,7 @@ private:
return (this->*decompiler)(*operation);
}
- if (const auto gpr = std::get_if<GprNode>(node)) {
+ if (const auto gpr = std::get_if<GprNode>(&*node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
return "0";
@@ -528,7 +527,7 @@ private:
return GetRegister(index);
}
- if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
@@ -537,7 +536,7 @@ private:
return fmt::format("utof(0x{:x}u)", immediate->GetValue());
}
- if (const auto predicate = std::get_if<PredicateNode>(node)) {
+ if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
const auto value = [&]() -> std::string {
switch (const auto index = predicate->GetIndex(); index) {
case Tegra::Shader::Pred::UnusedIndex:
@@ -554,7 +553,7 @@ private:
return value;
}
- if (const auto abuf = std::get_if<AbufNode>(node)) {
+ if (const auto abuf = std::get_if<AbufNode>(&*node)) {
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
@@ -564,9 +563,9 @@ private:
return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
}
- if (const auto cbuf = std::get_if<CbufNode>(node)) {
+ if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
- if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
@@ -602,22 +601,22 @@ private:
UNREACHABLE_MSG("Unmanaged offset node type");
}
- if (const auto gmem = std::get_if<GmemNode>(node)) {
+ if (const auto gmem = std::get_if<GmemNode>(&*node)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
}
- if (const auto lmem = std::get_if<LmemNode>(node)) {
+ if (const auto lmem = std::get_if<LmemNode>(&*node)) {
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
}
- if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
return GetInternalFlag(internal_flag->GetFlag());
}
- if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
++code.scope;
@@ -629,7 +628,7 @@ private:
return {};
}
- if (const auto comment = std::get_if<CommentNode>(node)) {
+ if (const auto comment = std::get_if<CommentNode>(&*node)) {
return "// " + comment->GetText();
}
@@ -637,7 +636,7 @@ private:
return {};
}
- std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) {
+ std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
if (stage == ShaderStage::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
@@ -650,10 +649,14 @@ private:
switch (attribute) {
case Attribute::Index::Position:
- if (stage != ShaderStage::Fragment) {
- return GeometryPass("position") + GetSwizzle(element);
- } else {
+ switch (stage) {
+ case ShaderStage::Geometry:
+ return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
+ GetSwizzle(element));
+ case ShaderStage::Fragment:
return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+ default:
+ UNREACHABLE();
}
case Attribute::Index::PointCoord:
switch (element) {
@@ -869,7 +872,7 @@ private:
std::string expr = ", ";
switch (type) {
case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
// Inline the string as an immediate integer in GLSL (some extra arguments are
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
@@ -901,7 +904,7 @@ private:
for (std::size_t index = 0; index < aoffi.size(); ++index) {
const auto operand{aoffi.at(index)};
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
@@ -922,23 +925,23 @@ private:
}
std::string Assign(Operation operation) {
- const Node dest = operation[0];
- const Node src = operation[1];
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
std::string target;
- if (const auto gpr = std::get_if<GprNode>(dest)) {
+ if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
// Writing to Register::ZeroIndex is a no op
return {};
}
target = GetRegister(gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+ } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
target = [&]() -> std::string {
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
case Attribute::Index::Position:
- return "position"s + GetSwizzle(abuf->GetElement());
+ return "gl_Position"s + GetSwizzle(abuf->GetElement());
case Attribute::Index::PointSize:
return "gl_PointSize";
case Attribute::Index::ClipDistances0123:
@@ -954,9 +957,9 @@ private:
return "0";
}
}();
- } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+ } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
- } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
+ } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
@@ -1233,12 +1236,12 @@ private:
}
std::string LogicalAssign(Operation operation) {
- const Node dest = operation[0];
- const Node src = operation[1];
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
std::string target;
- if (const auto pred = std::get_if<PredicateNode>(dest)) {
+ if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
const auto index = pred->GetIndex();
@@ -1249,7 +1252,7 @@ private:
return {};
}
target = GetPredicate(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+ } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
target = GetInternalFlag(flag->GetFlag());
}
@@ -1426,7 +1429,7 @@ private:
}
std::string Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(operation[0]);
+ const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
@@ -1435,7 +1438,7 @@ private:
}
std::string PushFlowStack(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(operation[0]);
+ const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue());
@@ -1464,27 +1467,9 @@ private:
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
- code.AddLine("if (alpha_test[0] != 0) {{");
- ++code.scope;
- // We start on the register containing the alpha value in the first RT.
- u32 current_reg = 3;
- for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
- // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
- // multiple render targets are used.
- if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
- header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
- header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
- header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
- code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg));
- current_reg += 4;
- }
- }
- --code.scope;
- code.AddLine("}}");
-
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
- current_reg = 0;
+ u32 current_reg = 0;
for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
@@ -1523,9 +1508,7 @@ private:
// If a geometry shader is attached, it will always flip (it's the last stage before
// fragment). For more info about flipping, refer to gl_shader_gen.cpp.
- code.AddLine("position.xy *= viewport_flip.xy;");
- code.AddLine("gl_Position = position;");
- code.AddLine("position.w = 1.0;");
+ code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
@@ -1763,8 +1746,7 @@ private:
}
u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
- Maxwell::NumVaryings);
+ return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index d2bb705a9..9148629ec 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -23,12 +23,9 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu
out += GetCommonDeclarations();
out += R"(
-layout (location = 0) out vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
- uvec4 alpha_test;
};
)";
@@ -48,7 +45,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
out += R"(
void main() {
- position = vec4(0.0, 0.0, 0.0, 0.0);
execute_vertex();
)";
@@ -59,19 +55,12 @@ void main() {
out += R"(
// Set Position Y direction
- position.y *= utof(config_pack[2]);
+ gl_Position.y *= utof(config_pack[2]);
// Check if the flip stage is VertexB
// Config pack's second value is flip_stage
if (config_pack[1] == 1) {
// Viewport can be flipped, which is unsupported by glViewport
- position.xy *= viewport_flip.xy;
- }
- gl_Position = position;
-
- // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
- // For now, this is here to bring order in lieu of proper emulation
- if (config_pack[1] == 1) {
- position.w = 1.0;
+ gl_Position.xy *= viewport_flip.xy;
}
})";
@@ -85,13 +74,9 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
out += GetCommonDeclarations();
out += R"(
-layout (location = 0) in vec4 gs_position[];
-layout (location = 0) out vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
- uvec4 alpha_test;
};
)";
@@ -124,38 +109,11 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
-layout (location = 0) in noperspective vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
- uvec4 alpha_test;
};
-bool AlphaFunc(in float value) {
- float ref = uintBitsToFloat(alpha_test[2]);
- switch (alpha_test[1]) {
- case 1:
- return false;
- case 2:
- return value < ref;
- case 3:
- return value == ref;
- case 4:
- return value <= ref;
- case 5:
- return value > ref;
- case 6:
- return value != ref;
- case 7:
- return value >= ref;
- case 8:
- return true;
- default:
- return false;
- }
-}
-
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 05ab01dcb..b05f90f20 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -48,17 +48,6 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
- auto func{static_cast<u32>(regs.alpha_test_func)};
- // Normalize the gl variants of opCompare to be the same as the normal variants
- const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
- if (func >= op_gl_variant_base) {
- func = func - op_gl_variant_base + 1U;
- }
-
- alpha_test.enabled = regs.alpha_test_enabled;
- alpha_test.func = func;
- alpha_test.ref = regs.alpha_test_ref;
-
instance_id = state.current_instance;
// Assign in which stage the position has to be flipped
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index cec18a832..6961e702a 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -27,14 +27,8 @@ struct MaxwellUniformData {
GLuint flip_stage;
GLfloat y_direction;
};
- struct alignas(16) {
- GLuint enabled;
- GLuint func;
- GLfloat ref;
- GLuint padding;
- } alpha_test;
};
-static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 7425fbe5d..d86e137ac 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -156,6 +156,10 @@ OpenGLState::OpenGLState() {
polygon_offset.factor = 0.0f;
polygon_offset.units = 0.0f;
polygon_offset.clamp = 0.0f;
+
+ alpha_test.enabled = false;
+ alpha_test.func = GL_ALWAYS;
+ alpha_test.ref = 0.0f;
}
void OpenGLState::ApplyDefaultState() {
@@ -461,6 +465,14 @@ void OpenGLState::ApplyPolygonOffset() const {
}
}
+void OpenGLState::ApplyAlphaTest() const {
+ Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
+ if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
+ std::tie(alpha_test.func, alpha_test.ref))) {
+ glAlphaFunc(alpha_test.func, alpha_test.ref);
+ }
+}
+
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
@@ -533,6 +545,7 @@ void OpenGLState::Apply() const {
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
+ ApplyAlphaTest();
}
void OpenGLState::EmulateViewportWithScissor() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 41418a7b8..b0140495d 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -172,6 +172,12 @@ public:
GLfloat clamp;
} polygon_offset;
+ struct {
+ bool enabled; // GL_ALPHA_TEST
+ GLenum func; // GL_ALPHA_TEST_FUNC
+ GLfloat ref; // GL_ALPHA_TEST_REF
+ } alpha_test;
+
std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE
OpenGLState();
@@ -215,6 +221,7 @@ public:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
+ void ApplyAlphaTest() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index ed7b5cff0..ea77dd211 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -128,6 +128,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return GL_TRIANGLE_FAN;
+ case Maxwell::PrimitiveTopology::Quads:
+ return GL_QUADS;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
@@ -173,11 +175,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return GL_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::Border:
return GL_CLAMP_TO_BORDER;
- case Tegra::Texture::WrapMode::ClampOGL:
- // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
- // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
- // manually mix them. However the shader part of this is not yet implemented.
- return GL_CLAMP_TO_BORDER;
+ case Tegra::Texture::WrapMode::Clamp:
+ return GL_CLAMP;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return GL_MIRROR_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceBorder:
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 9fe1e3280..0bbbf6851 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -52,7 +52,7 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return vk::SamplerAddressMode::eClampToEdge;
case Tegra::Texture::WrapMode::Border:
return vk::SamplerAddressMode::eClampToBorder;
- case Tegra::Texture::WrapMode::ClampOGL:
+ case Tegra::Texture::WrapMode::Clamp:
// TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
// eClampToBorder to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 00242ecbe..3b966ddc3 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
} // namespace Alternatives
@@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
SetupFamilies(dldi, surface);
SetupProperties(dldi);
+ SetupFeatures(dldi);
}
VKDevice::~VKDevice() = default;
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
- const auto queue_cis = GetDeviceQueueCreateInfos();
- vk::PhysicalDeviceFeatures device_features{};
+ vk::PhysicalDeviceFeatures device_features;
+ device_features.vertexPipelineStoresAndAtomics = true;
+ device_features.independentBlend = true;
+ device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
- const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+ const auto queue_cis = GetDeviceQueueCreateInfos();
+ const std::vector<const char*> extensions = LoadExtensions(dldi);
const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
0, nullptr, static_cast<u32>(extensions.size()),
extensions.data(), &device_features);
@@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
LOG_CRITICAL(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
- static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+ vk::to_string(wanted_format), vk::to_string(wanted_usage),
static_cast<u32>(format_type));
UNREACHABLE();
return wanted_format;
@@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
return wanted_format;
}
+bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
+ const vk::DispatchLoaderDynamic& dldi) const {
+ if (!features.textureCompressionASTC_LDR) {
+ return false;
+ }
+ const auto format_feature_usage{
+ vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
+ vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
+ vk::FormatFeatureFlagBits::eTransferDst};
+ constexpr std::array<vk::Format, 9> astc_formats = {
+ vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
+ vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
+ vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
+ vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
+ vk::Format::eAstc10x8SrgbBlock};
+ for (const auto format : astc_formats) {
+ const auto format_properties{physical.getFormatProperties(format, dldi)};
+ if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
+ return false;
+ }
+ }
+ return true;
+}
+
bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
@@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
- const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
-
bool has_swapchain{};
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- has_swapchain |= prop.extensionName == swapchain_extension;
+ has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
}
if (!has_swapchain) {
// The device doesn't support creating swapchains.
@@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
}
// TODO(Rodrigo): Check if the device matches all requeriments.
- const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
- if (props.limits.maxUniformBufferRange < 65536) {
+ const auto properties{physical.getProperties(dldi)};
+ const auto limits{properties.limits};
+ if (limits.maxUniformBufferRange < 65536) {
+ return false;
+ }
+
+ const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
+ if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
return false;
}
@@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
return true;
}
+std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
+ std::vector<const char*> extensions;
+ extensions.reserve(2);
+ extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+
+ const auto Test = [&](const vk::ExtensionProperties& extension,
+ std::optional<std::reference_wrapper<bool>> status, const char* name,
+ u32 revision) {
+ if (extension.extensionName != std::string(name)) {
+ return;
+ }
+ extensions.push_back(name);
+ if (status) {
+ status->get() = true;
+ }
+ };
+
+ for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+ Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
+ }
+
+ return extensions;
+}
+
void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
std::optional<u32> graphics_family_, present_family_;
@@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+ max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
+}
+
+void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
+ const auto supported_features{physical.getFeatures(dldi)};
+ is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
- static const float QUEUE_PRIORITY = 1.f;
+ static const float QUEUE_PRIORITY = 1.0f;
std::set<u32> unique_queue_families = {graphics_family, present_family};
std::vector<vk::DeviceQueueCreateInfo> queue_cis;
@@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+ static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+ vk::Format::eB5G6R5UnormPack16,
+ vk::Format::eA2B10G10R10UnormPack32,
+ vk::Format::eR32G32B32A32Sfloat,
+ vk::Format::eR16G16Unorm,
+ vk::Format::eR16G16Snorm,
+ vk::Format::eR8G8B8A8Srgb,
+ vk::Format::eR8Unorm,
+ vk::Format::eB10G11R11UfloatPack32,
+ vk::Format::eR32Sfloat,
+ vk::Format::eR16Sfloat,
+ vk::Format::eR16G16B16A16Sfloat,
+ vk::Format::eD32Sfloat,
+ vk::Format::eD16Unorm,
+ vk::Format::eD16UnormS8Uint,
+ vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint,
+ vk::Format::eBc1RgbaUnormBlock,
+ vk::Format::eBc2UnormBlock,
+ vk::Format::eBc3UnormBlock,
+ vk::Format::eBc4UnormBlock,
+ vk::Format::eBc5UnormBlock,
+ vk::Format::eBc5SnormBlock,
+ vk::Format::eBc7UnormBlock,
+ vk::Format::eAstc4x4UnormBlock,
+ vk::Format::eAstc4x4SrgbBlock,
+ vk::Format::eAstc8x8SrgbBlock,
+ vk::Format::eAstc8x6SrgbBlock,
+ vk::Format::eAstc5x4SrgbBlock,
+ vk::Format::eAstc5x5UnormBlock,
+ vk::Format::eAstc5x5SrgbBlock,
+ vk::Format::eAstc10x8UnormBlock,
+ vk::Format::eAstc10x8SrgbBlock};
std::map<vk::Format, vk::FormatProperties> format_properties;
-
- const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+ for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
- };
- AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
- AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
- AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
- AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
- AddFormatQuery(vk::Format::eR8Unorm);
- AddFormatQuery(vk::Format::eD32Sfloat);
- AddFormatQuery(vk::Format::eD16Unorm);
- AddFormatQuery(vk::Format::eD16UnormS8Uint);
- AddFormatQuery(vk::Format::eD24UnormS8Uint);
- AddFormatQuery(vk::Format::eD32SfloatS8Uint);
- AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
- AddFormatQuery(vk::Format::eBc2UnormBlock);
- AddFormatQuery(vk::Format::eBc3UnormBlock);
- AddFormatQuery(vk::Format::eBc4UnormBlock);
-
+ }
return format_properties;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index e87c7a508..537825d8b 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -11,7 +11,7 @@
namespace Vulkan {
-/// Format usage descriptor
+/// Format usage descriptor.
enum class FormatType { Linear, Optimal, Buffer };
/// Handles data specific to a physical device.
@@ -34,12 +34,12 @@ public:
vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const;
- /// Returns the dispatch loader with direct function pointers of the device
+ /// Returns the dispatch loader with direct function pointers of the device.
const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
return dld;
}
- /// Returns the logical device
+ /// Returns the logical device.
vk::Device GetLogical() const {
return logical.get();
}
@@ -69,30 +69,55 @@ public:
return present_family;
}
- /// Returns if the device is integrated with the host CPU
+ /// Returns if the device is integrated with the host CPU.
bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
}
- /// Returns uniform buffer alignment requeriment
+ /// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
+ /// Returns the maximum range for storage buffers.
+ u64 GetMaxStorageBufferRange() const {
+ return max_storage_buffer_range;
+ }
+
+ /// Returns true if ASTC is natively supported.
+ bool IsOptimalAstcSupported() const {
+ return is_optimal_astc_supported;
+ }
+
+ /// Returns true if the device supports VK_EXT_scalar_block_layout.
+ bool IsExtScalarBlockLayoutSupported() const {
+ return ext_scalar_block_layout;
+ }
+
/// Checks if the physical device is suitable.
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface);
private:
+ /// Loads extensions into a vector and stores available ones in this object.
+ std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
+
/// Sets up queue families.
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
/// Sets up device properties.
void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
+ /// Sets up device features.
+ void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
+
/// Returns a list of queue initialization descriptors.
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
+ /// Returns true if ASTC textures are natively supported.
+ bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
+ const vk::DispatchLoaderDynamic& dldi) const;
+
/// Returns true if a format is supported.
bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const;
@@ -101,16 +126,19 @@ private:
static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device
- vk::DispatchLoaderDynamic dld; ///< Device function pointers
- UniqueDevice logical; ///< Logical device
- vk::Queue graphics_queue; ///< Main graphics queue
- vk::Queue present_queue; ///< Main present queue
- u32 graphics_family{}; ///< Main graphics queue family index
- u32 present_family{}; ///< Main present queue family index
- vk::PhysicalDeviceType device_type; ///< Physical device type
- u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment
- std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::PhysicalDeviceType device_type; ///< Physical device type.
+ u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
+ u64 max_storage_buffer_range{}; ///< Max storage buffer size.
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout.
+ std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a5b25aeff..547883425 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
@@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&;
// TODO(Rodrigo): Use rasterizer's value
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
+constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
constexpr u32 STAGE_BINDING_STRIDE = 0x100;
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler : public Sirit::Module {
public:
- explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
- : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
+ explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
+ : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
AddCapability(spv::Capability::Shader);
AddExtension("SPV_KHR_storage_buffer_storage_class");
AddExtension("SPV_KHR_variable_pointers");
@@ -195,7 +197,9 @@ public:
entries.samplers.emplace_back(sampler);
}
for (const auto& attribute : ir.GetInputAttributes()) {
- entries.attributes.insert(GetGenericAttributeLocation(attribute));
+ if (IsGenericAttribute(attribute)) {
+ entries.attributes.insert(GetGenericAttributeLocation(attribute));
+ }
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -210,7 +214,6 @@ private:
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
- static constexpr u32 CBUF_STRIDE = 16;
void AllocateBindings() {
const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@@ -315,6 +318,7 @@ private:
constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
"overflow"};
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
+ const auto flag_code = static_cast<InternalFlag>(flag);
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
}
@@ -374,7 +378,9 @@ private:
u32 binding = const_buffers_base_binding;
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
+ const Id type =
+ device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
+ const Id id = OpVariable(type, spv::StorageClass::Uniform);
AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
Decorate(id, spv::Decoration::Binding, binding++);
@@ -475,13 +481,13 @@ private:
}
void VisitBasicBlock(const NodeBlock& bb) {
- for (const Node node : bb) {
+ for (const auto& node : bb) {
static_cast<void>(Visit(node));
}
}
- Id Visit(Node node) {
- if (const auto operation = std::get_if<OperationNode>(node)) {
+ Id Visit(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
const auto decompiler = operation_decompilers[operation_index];
if (decompiler == nullptr) {
@@ -489,17 +495,17 @@ private:
}
return (this->*decompiler)(*operation);
- } else if (const auto gpr = std::get_if<GprNode>(node)) {
+ } else if (const auto gpr = std::get_if<GprNode>(&*node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
return Constant(t_float, 0.0f);
}
return Emit(OpLoad(t_float, registers.at(index)));
- } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+ } else if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue()));
- } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+ } else if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
const auto value = [&]() -> Id {
switch (const auto index = predicate->GetIndex(); index) {
case Tegra::Shader::Pred::UnusedIndex:
@@ -515,7 +521,7 @@ private:
}
return value;
- } else if (const auto abuf = std::get_if<AbufNode>(node)) {
+ } else if (const auto abuf = std::get_if<AbufNode>(&*node)) {
const auto attribute = abuf->GetIndex();
const auto element = abuf->GetElement();
@@ -565,40 +571,42 @@ private:
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
- } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
- const Node offset = cbuf->GetOffset();
+ } else if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
+ const Node& offset = cbuf->GetOffset();
const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
- Id buffer_index{};
- Id buffer_element{};
-
- if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT(offset_imm % 4 == 0);
- buffer_index = Constant(t_uint, offset_imm / 16);
- buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
-
- } else if (std::holds_alternative<OperationNode>(*offset)) {
- // Indirect access
- // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
- // emits sub-optimal code on GLSL from my testing).
- const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
- const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
- const Id final_offset = Emit(
- OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
- buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
- buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
-
+ Id pointer{};
+ if (device.IsExtScalarBlockLayoutSupported()) {
+ const Id buffer_offset = Emit(OpShiftRightLogical(
+ t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
+ pointer = Emit(
+ OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
} else {
- UNREACHABLE_MSG("Unmanaged offset node type");
+ Id buffer_index{};
+ Id buffer_element{};
+ if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
+ // Direct access
+ const u32 offset_imm = immediate->GetValue();
+ ASSERT(offset_imm % 4 == 0);
+ buffer_index = Constant(t_uint, offset_imm / 16);
+ buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
+ } else if (std::holds_alternative<OperationNode>(*offset)) {
+ // Indirect access
+ const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
+ const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
+ const Id final_offset = Emit(OpUMod(
+ t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
+ buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
+ buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
+ } else {
+ UNREACHABLE_MSG("Unmanaged offset node type");
+ }
+ pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
+ buffer_index, buffer_element));
}
-
- const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
- buffer_index, buffer_element));
return Emit(OpLoad(t_float, pointer));
- } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+ } else if (const auto gmem = std::get_if<GmemNode>(&*node)) {
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress()));
const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress()));
@@ -608,11 +616,13 @@ private:
return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer,
Constant(t_uint, 0u), offset))));
- } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+ } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
const Id true_label = OpLabel();
const Id skip_label = OpLabel();
- Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
+ const Id condition = Visit(conditional->GetCondition());
+ Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
+ Emit(OpBranchConditional(condition, true_label, skip_label));
Emit(true_label);
VisitBasicBlock(conditional->GetCode());
@@ -621,7 +631,7 @@ private:
Emit(skip_label);
return {};
- } else if (const auto comment = std::get_if<CommentNode>(node)) {
+ } else if (const auto comment = std::get_if<CommentNode>(&*node)) {
Name(Emit(OpUndef(t_void)), comment->GetText());
return {};
}
@@ -689,18 +699,18 @@ private:
}
Id Assign(Operation operation) {
- const Node dest = operation[0];
- const Node src = operation[1];
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
Id target{};
- if (const auto gpr = std::get_if<GprNode>(dest)) {
+ if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
// Writing to Register::ZeroIndex is a no op
return {};
}
target = registers.at(gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+ } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
target = [&]() -> Id {
switch (const auto attribute = abuf->GetIndex(); attribute) {
case Attribute::Index::Position:
@@ -725,7 +735,7 @@ private:
}
}();
- } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+ } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress()));
address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4)));
target = Emit(OpAccessChain(t_prv_float, local_memory, {address}));
@@ -771,11 +781,11 @@ private:
}
Id LogicalAssign(Operation operation) {
- const Node dest = operation[0];
- const Node src = operation[1];
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
Id target{};
- if (const auto pred = std::get_if<PredicateNode>(dest)) {
+ if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
const auto index = pred->GetIndex();
@@ -787,7 +797,7 @@ private:
}
target = predicates.at(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+ } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
}
@@ -873,7 +883,7 @@ private:
} else {
u32 component_value = 0;
if (meta->component) {
- const auto component = std::get_if<ImmediateNode>(meta->component);
+ const auto component = std::get_if<ImmediateNode>(&*meta->component);
ASSERT_MSG(component, "Component is not an immediate value");
component_value = component->GetValue();
}
@@ -930,7 +940,7 @@ private:
}
Id Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(operation[0]);
+ const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
@@ -939,7 +949,7 @@ private:
}
Id PushFlowStack(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(operation[0]);
+ const auto target = std::get_if<ImmediateNode>(&*operation[0]);
ASSERT(target);
const Id current = Emit(OpLoad(t_uint, flow_stack_top));
@@ -968,11 +978,11 @@ private:
case ShaderStage::Vertex: {
// TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
// seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
- const Id position = AccessElement(t_float4, per_vertex, position_index);
- Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
+ const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
+ Id depth = Emit(OpLoad(t_float, z_pointer));
depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
- Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
+ Emit(OpStore(z_pointer, depth));
break;
}
case ShaderStage::Fragment: {
@@ -1311,6 +1321,7 @@ private:
&SPIRVDecompiler::WorkGroupId<2>,
};
+ const VKDevice& device;
const ShaderIR& ir;
const ShaderStage stage;
const Tegra::Shader::Header header;
@@ -1349,12 +1360,18 @@ private:
const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
- const Id t_cbuf_array =
- Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
- spv::Decoration::ArrayStride, CBUF_STRIDE);
- const Id t_cbuf_struct = MemberDecorate(
- Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
+ const Id t_cbuf_std140 = Decorate(
+ Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
+ spv::Decoration::ArrayStride, 16u);
+ const Id t_cbuf_scalar = Decorate(
+ Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
+ spv::Decoration::ArrayStride, 4u);
+ const Id t_cbuf_std140_struct = MemberDecorate(
+ Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+ const Id t_cbuf_scalar_struct = MemberDecorate(
+ Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+ const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
+ const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
const Id t_gmem_array =
@@ -1403,8 +1420,9 @@ private:
std::map<u32, Id> labels;
};
-DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
- auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
+DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+ Maxwell::ShaderStage stage) {
+ auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
decompiler->Decompile();
return {std::move(decompiler), decompiler->GetShaderEntries()};
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 329d8fa38..f90541cc1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
class ShaderIR;
}
+namespace Vulkan {
+class VKDevice;
+}
+
namespace Vulkan::VKShader {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
using SamplerEntry = VideoCommon::Shader::Sampler;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -75,6 +78,7 @@ struct ShaderEntries {
using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
-DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+ Maxwell::ShaderStage stage);
} // namespace Vulkan::VKShader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2da595c0d..a0554c97e 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index b4859bc1e..87d8fecaa 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -6,6 +6,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 3a29c4a46..b06cbe441 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -6,6 +6,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 5341e460f..7bcf38f23 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -6,6 +6,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 3095f2fd4..f1875967c 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 9fd4b273e..c8c1a7f40 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 679ac0d4e..73880db0e 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index 1ae192c6a..e02bcd097 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index 0b12a0d08..8be1119df 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index b5ec9a6f5..4221f0c58 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index a1d04c6e5..29be25ca3 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index cc522f1de..f5013e44a 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 9d2322a1d..2323052b0 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 755f2ec44..48ca7a4af 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -8,6 +8,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index fba44d714..d59d15bd8 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index a425f9eb7..c3bcf1ae9 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index a4cdaf74d..46e3d5905 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -4,6 +4,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index a6a1fb632..dd20775d7 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e6a010a7d..80fc0ccfc 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -169,7 +170,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
- const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+ const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
SetTemporal(bb, i, gmem);
}
@@ -262,7 +263,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
- const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+ const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
@@ -298,9 +299,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
const Node base_address{
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf = std::get_if<CbufNode>(base_address);
+ const auto cbuf = std::get_if<CbufNode>(&*base_address);
ASSERT(cbuf != nullptr);
- const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+ const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue();
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index a6c123573..6fc07f213 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -6,6 +6,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
index 71844c42b..9290d22eb 100644
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 387491bd3..febbfeb50 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index f8659e48e..e6c9d287e 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 44ae87ece..2ac16eeb0 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 5b033126d..4a356dbd4 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -291,8 +292,8 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
const Node sampler_register = GetRegister(reg);
const Node base_sampler =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
- const auto cbuf = std::get_if<CbufNode>(base_sampler);
- const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+ const auto cbuf = std::get_if<CbufNode>(&*base_sampler);
+ const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue();
const auto cbuf_index = cbuf->GetIndex();
@@ -388,8 +389,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node array, Node depth_compare, u32 bias_offset,
std::vector<Node> aoffi,
std::optional<Tegra::Shader::Register> bindless_reg) {
- const bool is_array = array;
- const bool is_shadow = depth_compare;
+ const auto is_array = static_cast<bool>(array);
+ const auto is_shadow = static_cast<bool>(depth_compare);
const bool is_bindless = bindless_reg.has_value();
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index cb9ab72b1..97fc6f9b1 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 04a776398..93dee77d1 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
new file mode 100644
index 000000000..c002f90f9
--- /dev/null
+++ b/src/video_core/shader/node.h
@@ -0,0 +1,514 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+enum class OperationCode {
+ Assign, /// (float& dest, float src) -> void
+
+ Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
+
+ FAdd, /// (MetaArithmetic, float a, float b) -> float
+ FMul, /// (MetaArithmetic, float a, float b) -> float
+ FDiv, /// (MetaArithmetic, float a, float b) -> float
+ FFma, /// (MetaArithmetic, float a, float b, float c) -> float
+ FNegate, /// (MetaArithmetic, float a) -> float
+ FAbsolute, /// (MetaArithmetic, float a) -> float
+ FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
+ FMin, /// (MetaArithmetic, float a, float b) -> float
+ FMax, /// (MetaArithmetic, float a, float b) -> float
+ FCos, /// (MetaArithmetic, float a) -> float
+ FSin, /// (MetaArithmetic, float a) -> float
+ FExp2, /// (MetaArithmetic, float a) -> float
+ FLog2, /// (MetaArithmetic, float a) -> float
+ FInverseSqrt, /// (MetaArithmetic, float a) -> float
+ FSqrt, /// (MetaArithmetic, float a) -> float
+ FRoundEven, /// (MetaArithmetic, float a) -> float
+ FFloor, /// (MetaArithmetic, float a) -> float
+ FCeil, /// (MetaArithmetic, float a) -> float
+ FTrunc, /// (MetaArithmetic, float a) -> float
+ FCastInteger, /// (MetaArithmetic, int a) -> float
+ FCastUInteger, /// (MetaArithmetic, uint a) -> float
+
+ IAdd, /// (MetaArithmetic, int a, int b) -> int
+ IMul, /// (MetaArithmetic, int a, int b) -> int
+ IDiv, /// (MetaArithmetic, int a, int b) -> int
+ INegate, /// (MetaArithmetic, int a) -> int
+ IAbsolute, /// (MetaArithmetic, int a) -> int
+ IMin, /// (MetaArithmetic, int a, int b) -> int
+ IMax, /// (MetaArithmetic, int a, int b) -> int
+ ICastFloat, /// (MetaArithmetic, float a) -> int
+ ICastUnsigned, /// (MetaArithmetic, uint a) -> int
+ ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
+ ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
+ IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
+ IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
+ IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
+ IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
+ IBitwiseNot, /// (MetaArithmetic, int a) -> int
+ IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
+ IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
+ IBitCount, /// (MetaArithmetic, int) -> int
+
+ UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
+ UMul, /// (MetaArithmetic, uint a, uint b) -> uint
+ UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
+ UMin, /// (MetaArithmetic, uint a, uint b) -> uint
+ UMax, /// (MetaArithmetic, uint a, uint b) -> uint
+ UCastFloat, /// (MetaArithmetic, float a) -> uint
+ UCastSigned, /// (MetaArithmetic, int a) -> uint
+ ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
+ ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
+ UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
+ UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
+ UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
+ UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
+ UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
+ UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
+ UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
+ UBitCount, /// (MetaArithmetic, uint) -> uint
+
+ HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+ HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+ HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+ HAbsolute, /// (f16vec2 a) -> f16vec2
+ HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
+ HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
+ HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
+ HMergeF32, /// (f16vec2 src) -> float
+ HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
+ HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
+ HPack2, /// (float a, float b) -> f16vec2
+
+ LogicalAssign, /// (bool& dst, bool src) -> void
+ LogicalAnd, /// (bool a, bool b) -> bool
+ LogicalOr, /// (bool a, bool b) -> bool
+ LogicalXor, /// (bool a, bool b) -> bool
+ LogicalNegate, /// (bool a) -> bool
+ LogicalPick2, /// (bool2 pair, uint index) -> bool
+ LogicalAll2, /// (bool2 a) -> bool
+ LogicalAny2, /// (bool2 a) -> bool
+
+ LogicalFLessThan, /// (float a, float b) -> bool
+ LogicalFEqual, /// (float a, float b) -> bool
+ LogicalFLessEqual, /// (float a, float b) -> bool
+ LogicalFGreaterThan, /// (float a, float b) -> bool
+ LogicalFNotEqual, /// (float a, float b) -> bool
+ LogicalFGreaterEqual, /// (float a, float b) -> bool
+ LogicalFIsNan, /// (float a) -> bool
+
+ LogicalILessThan, /// (int a, int b) -> bool
+ LogicalIEqual, /// (int a, int b) -> bool
+ LogicalILessEqual, /// (int a, int b) -> bool
+ LogicalIGreaterThan, /// (int a, int b) -> bool
+ LogicalINotEqual, /// (int a, int b) -> bool
+ LogicalIGreaterEqual, /// (int a, int b) -> bool
+
+ LogicalULessThan, /// (uint a, uint b) -> bool
+ LogicalUEqual, /// (uint a, uint b) -> bool
+ LogicalULessEqual, /// (uint a, uint b) -> bool
+ LogicalUGreaterThan, /// (uint a, uint b) -> bool
+ LogicalUNotEqual, /// (uint a, uint b) -> bool
+ LogicalUGreaterEqual, /// (uint a, uint b) -> bool
+
+ Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+ Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+
+ Texture, /// (MetaTexture, float[N] coords) -> float4
+ TextureLod, /// (MetaTexture, float[N] coords) -> float4
+ TextureGather, /// (MetaTexture, float[N] coords) -> float4
+ TextureQueryDimensions, /// (MetaTexture, float a) -> float4
+ TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
+ TexelFetch, /// (MetaTexture, int[N], int) -> float4
+
+ Branch, /// (uint branch_target) -> void
+ PushFlowStack, /// (uint branch_target) -> void
+ PopFlowStack, /// () -> void
+ Exit, /// () -> void
+ Discard, /// () -> void
+
+ EmitVertex, /// () -> void
+ EndPrimitive, /// () -> void
+
+ YNegate, /// () -> float
+ LocalInvocationIdX, /// () -> uint
+ LocalInvocationIdY, /// () -> uint
+ LocalInvocationIdZ, /// () -> uint
+ WorkGroupIdX, /// () -> uint
+ WorkGroupIdY, /// () -> uint
+ WorkGroupIdZ, /// () -> uint
+
+ Amount,
+};
+
+enum class InternalFlag {
+ Zero = 0,
+ Sign = 1,
+ Carry = 2,
+ Overflow = 3,
+ Amount = 4,
+};
+
+class OperationNode;
+class ConditionalNode;
+class GprNode;
+class ImmediateNode;
+class InternalFlagNode;
+class PredicateNode;
+class AbufNode;
+class CbufNode;
+class LmemNode;
+class GmemNode;
+class CommentNode;
+
+using NodeData =
+ std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
+ PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
+using Node = std::shared_ptr<NodeData>;
+using Node4 = std::array<Node, 4>;
+using NodeBlock = std::vector<Node>;
+
+class Sampler {
+public:
+ /// This constructor is for bound samplers
+ explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+ is_bindless{false} {}
+
+ /// This constructor is for bindless samplers
+ explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+ Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
+ : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
+ is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
+
+ /// This constructor is for serialization/deserialization
+ explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow, bool is_bindless)
+ : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+ is_bindless{is_bindless} {}
+
+ std::size_t GetOffset() const {
+ return offset;
+ }
+
+ std::size_t GetIndex() const {
+ return index;
+ }
+
+ Tegra::Shader::TextureType GetType() const {
+ return type;
+ }
+
+ bool IsArray() const {
+ return is_array;
+ }
+
+ bool IsShadow() const {
+ return is_shadow;
+ }
+
+ bool IsBindless() const {
+ return is_bindless;
+ }
+
+ std::pair<u32, u32> GetBindlessCBuf() const {
+ return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+ }
+
+ bool operator<(const Sampler& rhs) const {
+ return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
+ std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
+ rhs.is_bindless);
+ }
+
+private:
+ /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
+ /// instruction.
+ std::size_t offset{};
+ std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+ Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
+ bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
+ bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
+};
+
+struct GlobalMemoryBase {
+ u32 cbuf_index{};
+ u32 cbuf_offset{};
+
+ bool operator<(const GlobalMemoryBase& rhs) const {
+ return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
+ }
+};
+
+/// Parameters describing an arithmetic operation
+struct MetaArithmetic {
+ bool precise{}; ///< Whether the operation can be constraint or not
+};
+
+/// Parameters describing a texture sampler
+struct MetaTexture {
+ const Sampler& sampler;
+ Node array;
+ Node depth_compare;
+ std::vector<Node> aoffi;
+ Node bias;
+ Node lod;
+ Node component{};
+ u32 element{};
+};
+
+/// Parameters that modify an operation but are not part of any particular operand
+using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
+
+/// Holds any kind of operation that can be done in the IR
+class OperationNode final {
+public:
+ explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
+
+ explicit OperationNode(OperationCode code, Meta meta)
+ : OperationNode(code, meta, std::vector<Node>{}) {}
+
+ explicit OperationNode(OperationCode code, std::vector<Node> operands)
+ : OperationNode(code, Meta{}, std::move(operands)) {}
+
+ explicit OperationNode(OperationCode code, Meta meta, std::vector<Node> operands)
+ : code{code}, meta{std::move(meta)}, operands{std::move(operands)} {}
+
+ template <typename... Args>
+ explicit OperationNode(OperationCode code, Meta meta, Args&&... operands)
+ : code{code}, meta{std::move(meta)}, operands{operands...} {}
+
+ OperationCode GetCode() const {
+ return code;
+ }
+
+ const Meta& GetMeta() const {
+ return meta;
+ }
+
+ std::size_t GetOperandsCount() const {
+ return operands.size();
+ }
+
+ const Node& operator[](std::size_t operand_index) const {
+ return operands.at(operand_index);
+ }
+
+private:
+ OperationCode code{};
+ Meta meta{};
+ std::vector<Node> operands;
+};
+
+/// Encloses inside any kind of node that returns a boolean conditionally-executed code
+class ConditionalNode final {
+public:
+ explicit ConditionalNode(Node condition, std::vector<Node>&& code)
+ : condition{std::move(condition)}, code{std::move(code)} {}
+
+ const Node& GetCondition() const {
+ return condition;
+ }
+
+ const std::vector<Node>& GetCode() const {
+ return code;
+ }
+
+private:
+ Node condition; ///< Condition to be satisfied
+ std::vector<Node> code; ///< Code to execute
+};
+
+/// A general purpose register
+class GprNode final {
+public:
+ explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
+
+ u32 GetIndex() const {
+ return static_cast<u32>(index);
+ }
+
+private:
+ Tegra::Shader::Register index{};
+};
+
+/// A 32-bits value that represents an immediate value
+class ImmediateNode final {
+public:
+ explicit constexpr ImmediateNode(u32 value) : value{value} {}
+
+ u32 GetValue() const {
+ return value;
+ }
+
+private:
+ u32 value{};
+};
+
+/// One of Maxwell's internal flags
+class InternalFlagNode final {
+public:
+ explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
+
+ InternalFlag GetFlag() const {
+ return flag;
+ }
+
+private:
+ InternalFlag flag{};
+};
+
+/// A predicate register, it can be negated without additional nodes
+class PredicateNode final {
+public:
+ explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
+ : index{index}, negated{negated} {}
+
+ Tegra::Shader::Pred GetIndex() const {
+ return index;
+ }
+
+ bool IsNegated() const {
+ return negated;
+ }
+
+private:
+ Tegra::Shader::Pred index{};
+ bool negated{};
+};
+
+/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
+class AbufNode final {
+public:
+ // Initialize for standard attributes (index is explicit).
+ explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {})
+ : buffer{std::move(buffer)}, index{index}, element{element} {}
+
+ // Initialize for physical attributes (index is a variable value).
+ explicit AbufNode(Node physical_address, Node buffer = {})
+ : physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {}
+
+ Tegra::Shader::Attribute::Index GetIndex() const {
+ return index;
+ }
+
+ u32 GetElement() const {
+ return element;
+ }
+
+ const Node& GetBuffer() const {
+ return buffer;
+ }
+
+ bool IsPhysicalBuffer() const {
+ return static_cast<bool>(physical_address);
+ }
+
+ const Node& GetPhysicalAddress() const {
+ return physical_address;
+ }
+
+private:
+ Node physical_address;
+ Node buffer;
+ Tegra::Shader::Attribute::Index index{};
+ u32 element{};
+};
+
+/// Constant buffer node, usually mapped to uniform buffers in GLSL
+class CbufNode final {
+public:
+ explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {}
+
+ u32 GetIndex() const {
+ return index;
+ }
+
+ const Node& GetOffset() const {
+ return offset;
+ }
+
+private:
+ u32 index{};
+ Node offset;
+};
+
+/// Local memory node
+class LmemNode final {
+public:
+ explicit LmemNode(Node address) : address{std::move(address)} {}
+
+ const Node& GetAddress() const {
+ return address;
+ }
+
+private:
+ Node address;
+};
+
+/// Global memory node
+class GmemNode final {
+public:
+ explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor)
+ : real_address{std::move(real_address)}, base_address{std::move(base_address)},
+ descriptor{descriptor} {}
+
+ const Node& GetRealAddress() const {
+ return real_address;
+ }
+
+ const Node& GetBaseAddress() const {
+ return base_address;
+ }
+
+ const GlobalMemoryBase& GetDescriptor() const {
+ return descriptor;
+ }
+
+private:
+ Node real_address;
+ Node base_address;
+ GlobalMemoryBase descriptor;
+};
+
+/// Commentary, can be dropped
+class CommentNode final {
+public:
+ explicit CommentNode(std::string text) : text{std::move(text)} {}
+
+ const std::string& GetText() const {
+ return text;
+ }
+
+private:
+ std::string text;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
new file mode 100644
index 000000000..6fccbbba3
--- /dev/null
+++ b/src/video_core/shader/node_helper.cpp
@@ -0,0 +1,99 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+Node Conditional(Node condition, std::vector<Node> code) {
+ return MakeNode<ConditionalNode>(condition, std::move(code));
+}
+
+Node Comment(std::string text) {
+ return MakeNode<CommentNode>(std::move(text));
+}
+
+Node Immediate(u32 value) {
+ return MakeNode<ImmediateNode>(value);
+}
+
+Node Immediate(s32 value) {
+ return Immediate(static_cast<u32>(value));
+}
+
+Node Immediate(f32 value) {
+ u32 integral;
+ std::memcpy(&integral, &value, sizeof(u32));
+ return Immediate(integral);
+}
+
+OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
+ if (is_signed) {
+ return operation_code;
+ }
+ switch (operation_code) {
+ case OperationCode::FCastInteger:
+ return OperationCode::FCastUInteger;
+ case OperationCode::IAdd:
+ return OperationCode::UAdd;
+ case OperationCode::IMul:
+ return OperationCode::UMul;
+ case OperationCode::IDiv:
+ return OperationCode::UDiv;
+ case OperationCode::IMin:
+ return OperationCode::UMin;
+ case OperationCode::IMax:
+ return OperationCode::UMax;
+ case OperationCode::ICastFloat:
+ return OperationCode::UCastFloat;
+ case OperationCode::ICastUnsigned:
+ return OperationCode::UCastSigned;
+ case OperationCode::ILogicalShiftLeft:
+ return OperationCode::ULogicalShiftLeft;
+ case OperationCode::ILogicalShiftRight:
+ return OperationCode::ULogicalShiftRight;
+ case OperationCode::IArithmeticShiftRight:
+ return OperationCode::UArithmeticShiftRight;
+ case OperationCode::IBitwiseAnd:
+ return OperationCode::UBitwiseAnd;
+ case OperationCode::IBitwiseOr:
+ return OperationCode::UBitwiseOr;
+ case OperationCode::IBitwiseXor:
+ return OperationCode::UBitwiseXor;
+ case OperationCode::IBitwiseNot:
+ return OperationCode::UBitwiseNot;
+ case OperationCode::IBitfieldInsert:
+ return OperationCode::UBitfieldInsert;
+ case OperationCode::IBitCount:
+ return OperationCode::UBitCount;
+ case OperationCode::LogicalILessThan:
+ return OperationCode::LogicalULessThan;
+ case OperationCode::LogicalIEqual:
+ return OperationCode::LogicalUEqual;
+ case OperationCode::LogicalILessEqual:
+ return OperationCode::LogicalULessEqual;
+ case OperationCode::LogicalIGreaterThan:
+ return OperationCode::LogicalUGreaterThan;
+ case OperationCode::LogicalINotEqual:
+ return OperationCode::LogicalUNotEqual;
+ case OperationCode::LogicalIGreaterEqual:
+ return OperationCode::LogicalUGreaterEqual;
+ case OperationCode::INegate:
+ UNREACHABLE_MSG("Can't negate an unsigned integer");
+ return {};
+ case OperationCode::IAbsolute:
+ UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
+ return {};
+ default:
+ UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
+ return {};
+ }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
new file mode 100644
index 000000000..0c2aa749b
--- /dev/null
+++ b/src/video_core/shader/node_helper.h
@@ -0,0 +1,65 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/shader/node.h"
+
+namespace VideoCommon::Shader {
+
+/// This arithmetic operation cannot be constraint
+inline constexpr MetaArithmetic PRECISE = {true};
+/// This arithmetic operation can be optimized away
+inline constexpr MetaArithmetic NO_PRECISE = {false};
+
+/// Creates a conditional node
+Node Conditional(Node condition, std::vector<Node> code);
+
+/// Creates a commentary node
+Node Comment(std::string text);
+
+/// Creates an u32 immediate
+Node Immediate(u32 value);
+
+/// Creates a s32 immediate
+Node Immediate(s32 value);
+
+/// Creates a f32 immediate
+Node Immediate(f32 value);
+
+/// Converts an signed operation code to an unsigned operation code
+OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
+
+template <typename T, typename... Args>
+Node MakeNode(Args&&... args) {
+ static_assert(std::is_convertible_v<T, NodeData>);
+ return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
+}
+
+template <typename... Args>
+Node Operation(OperationCode code, Args&&... args) {
+ if constexpr (sizeof...(args) == 0) {
+ return MakeNode<OperationNode>(code);
+ } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
+ Meta>) {
+ return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
+ } else {
+ return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
+ }
+}
+
+template <typename... Args>
+Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
+ return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 8a6ee5cf5..11b545cca 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -28,30 +29,11 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
ShaderIR::~ShaderIR() = default;
-Node ShaderIR::StoreNode(NodeData&& node_data) {
- auto store = std::make_unique<NodeData>(node_data);
- const Node node = store.get();
- stored_nodes.push_back(std::move(store));
- return node;
-}
-
-Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
- return StoreNode(ConditionalNode(condition, std::move(code)));
-}
-
-Node ShaderIR::Comment(std::string text) {
- return StoreNode(CommentNode(std::move(text)));
-}
-
-Node ShaderIR::Immediate(u32 value) {
- return StoreNode(ImmediateNode(value));
-}
-
Node ShaderIR::GetRegister(Register reg) {
if (reg != Register::ZeroIndex) {
used_registers.insert(static_cast<u32>(reg));
}
- return StoreNode(GprNode(reg));
+ return MakeNode<GprNode>(reg);
}
Node ShaderIR::GetImmediate19(Instruction instr) {
@@ -69,7 +51,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
const auto [entry, is_new] = used_cbufs.try_emplace(index);
entry->second.MarkAsUsed(offset);
- return StoreNode(CbufNode(index, Immediate(offset)));
+ return MakeNode<CbufNode>(index, Immediate(offset));
}
Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
@@ -80,7 +62,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
entry->second.MarkAsUsedIndirect();
const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
- return StoreNode(CbufNode(index, final_offset));
+ return MakeNode<CbufNode>(index, final_offset);
}
Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -89,7 +71,7 @@ Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
used_predicates.insert(pred);
}
- return StoreNode(PredicateNode(pred, negated));
+ return MakeNode<PredicateNode>(pred, negated);
}
Node ShaderIR::GetPredicate(bool immediate) {
@@ -98,12 +80,12 @@ Node ShaderIR::GetPredicate(bool immediate) {
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
used_input_attributes.emplace(index);
- return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
+ return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
}
Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
uses_physical_attributes = true;
- return StoreNode(AbufNode(GetRegister(physical_address), buffer));
+ return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
}
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
@@ -115,11 +97,11 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
}
used_output_attributes.insert(index);
- return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
+ return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
}
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
- const Node node = StoreNode(InternalFlagNode(flag));
+ const Node node = MakeNode<InternalFlagNode>(flag);
if (negated) {
return Operation(OperationCode::LogicalNegate, node);
}
@@ -127,7 +109,7 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
}
Node ShaderIR::GetLocalMemory(Node address) {
- return StoreNode(LmemNode(address));
+ return MakeNode<LmemNode>(address);
}
Node ShaderIR::GetTemporal(u32 id) {
@@ -393,68 +375,4 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
Immediate(bits));
}
-/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code,
- bool is_signed) {
- if (is_signed) {
- return operation_code;
- }
- switch (operation_code) {
- case OperationCode::FCastInteger:
- return OperationCode::FCastUInteger;
- case OperationCode::IAdd:
- return OperationCode::UAdd;
- case OperationCode::IMul:
- return OperationCode::UMul;
- case OperationCode::IDiv:
- return OperationCode::UDiv;
- case OperationCode::IMin:
- return OperationCode::UMin;
- case OperationCode::IMax:
- return OperationCode::UMax;
- case OperationCode::ICastFloat:
- return OperationCode::UCastFloat;
- case OperationCode::ICastUnsigned:
- return OperationCode::UCastSigned;
- case OperationCode::ILogicalShiftLeft:
- return OperationCode::ULogicalShiftLeft;
- case OperationCode::ILogicalShiftRight:
- return OperationCode::ULogicalShiftRight;
- case OperationCode::IArithmeticShiftRight:
- return OperationCode::UArithmeticShiftRight;
- case OperationCode::IBitwiseAnd:
- return OperationCode::UBitwiseAnd;
- case OperationCode::IBitwiseOr:
- return OperationCode::UBitwiseOr;
- case OperationCode::IBitwiseXor:
- return OperationCode::UBitwiseXor;
- case OperationCode::IBitwiseNot:
- return OperationCode::UBitwiseNot;
- case OperationCode::IBitfieldInsert:
- return OperationCode::UBitfieldInsert;
- case OperationCode::IBitCount:
- return OperationCode::UBitCount;
- case OperationCode::LogicalILessThan:
- return OperationCode::LogicalULessThan;
- case OperationCode::LogicalIEqual:
- return OperationCode::LogicalUEqual;
- case OperationCode::LogicalILessEqual:
- return OperationCode::LogicalULessEqual;
- case OperationCode::LogicalIGreaterThan:
- return OperationCode::LogicalUGreaterThan;
- case OperationCode::LogicalINotEqual:
- return OperationCode::LogicalUNotEqual;
- case OperationCode::LogicalIGreaterEqual:
- return OperationCode::LogicalUGreaterEqual;
- case OperationCode::INegate:
- UNREACHABLE_MSG("Can't negate an unsigned integer");
- return {};
- case OperationCode::IAbsolute:
- UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
- return {};
- default:
- UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
- return {};
- }
-}
-
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ff7472e30..edcf2288e 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,188 +18,14 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/shader/node.h"
namespace VideoCommon::Shader {
-class OperationNode;
-class ConditionalNode;
-class GprNode;
-class ImmediateNode;
-class InternalFlagNode;
-class PredicateNode;
-class AbufNode; ///< Attribute buffer
-class CbufNode; ///< Constant buffer
-class LmemNode; ///< Local memory
-class GmemNode; ///< Global memory
-class CommentNode;
-
using ProgramCode = std::vector<u64>;
-using NodeData =
- std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
- PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
-using Node = const NodeData*;
-using Node4 = std::array<Node, 4>;
-using NodeBlock = std::vector<Node>;
-
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
-enum class OperationCode {
- Assign, /// (float& dest, float src) -> void
-
- Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
-
- FAdd, /// (MetaArithmetic, float a, float b) -> float
- FMul, /// (MetaArithmetic, float a, float b) -> float
- FDiv, /// (MetaArithmetic, float a, float b) -> float
- FFma, /// (MetaArithmetic, float a, float b, float c) -> float
- FNegate, /// (MetaArithmetic, float a) -> float
- FAbsolute, /// (MetaArithmetic, float a) -> float
- FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
- FMin, /// (MetaArithmetic, float a, float b) -> float
- FMax, /// (MetaArithmetic, float a, float b) -> float
- FCos, /// (MetaArithmetic, float a) -> float
- FSin, /// (MetaArithmetic, float a) -> float
- FExp2, /// (MetaArithmetic, float a) -> float
- FLog2, /// (MetaArithmetic, float a) -> float
- FInverseSqrt, /// (MetaArithmetic, float a) -> float
- FSqrt, /// (MetaArithmetic, float a) -> float
- FRoundEven, /// (MetaArithmetic, float a) -> float
- FFloor, /// (MetaArithmetic, float a) -> float
- FCeil, /// (MetaArithmetic, float a) -> float
- FTrunc, /// (MetaArithmetic, float a) -> float
- FCastInteger, /// (MetaArithmetic, int a) -> float
- FCastUInteger, /// (MetaArithmetic, uint a) -> float
-
- IAdd, /// (MetaArithmetic, int a, int b) -> int
- IMul, /// (MetaArithmetic, int a, int b) -> int
- IDiv, /// (MetaArithmetic, int a, int b) -> int
- INegate, /// (MetaArithmetic, int a) -> int
- IAbsolute, /// (MetaArithmetic, int a) -> int
- IMin, /// (MetaArithmetic, int a, int b) -> int
- IMax, /// (MetaArithmetic, int a, int b) -> int
- ICastFloat, /// (MetaArithmetic, float a) -> int
- ICastUnsigned, /// (MetaArithmetic, uint a) -> int
- ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
- ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
- IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
- IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseNot, /// (MetaArithmetic, int a) -> int
- IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
- IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
- IBitCount, /// (MetaArithmetic, int) -> int
-
- UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
- UMul, /// (MetaArithmetic, uint a, uint b) -> uint
- UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
- UMin, /// (MetaArithmetic, uint a, uint b) -> uint
- UMax, /// (MetaArithmetic, uint a, uint b) -> uint
- UCastFloat, /// (MetaArithmetic, float a) -> uint
- UCastSigned, /// (MetaArithmetic, int a) -> uint
- ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
- ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
- UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
- UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
- UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
- UBitCount, /// (MetaArithmetic, uint) -> uint
-
- HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
- HAbsolute, /// (f16vec2 a) -> f16vec2
- HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
- HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
- HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
- HMergeF32, /// (f16vec2 src) -> float
- HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
- HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
- HPack2, /// (float a, float b) -> f16vec2
-
- LogicalAssign, /// (bool& dst, bool src) -> void
- LogicalAnd, /// (bool a, bool b) -> bool
- LogicalOr, /// (bool a, bool b) -> bool
- LogicalXor, /// (bool a, bool b) -> bool
- LogicalNegate, /// (bool a) -> bool
- LogicalPick2, /// (bool2 pair, uint index) -> bool
- LogicalAll2, /// (bool2 a) -> bool
- LogicalAny2, /// (bool2 a) -> bool
-
- LogicalFLessThan, /// (float a, float b) -> bool
- LogicalFEqual, /// (float a, float b) -> bool
- LogicalFLessEqual, /// (float a, float b) -> bool
- LogicalFGreaterThan, /// (float a, float b) -> bool
- LogicalFNotEqual, /// (float a, float b) -> bool
- LogicalFGreaterEqual, /// (float a, float b) -> bool
- LogicalFIsNan, /// (float a) -> bool
-
- LogicalILessThan, /// (int a, int b) -> bool
- LogicalIEqual, /// (int a, int b) -> bool
- LogicalILessEqual, /// (int a, int b) -> bool
- LogicalIGreaterThan, /// (int a, int b) -> bool
- LogicalINotEqual, /// (int a, int b) -> bool
- LogicalIGreaterEqual, /// (int a, int b) -> bool
-
- LogicalULessThan, /// (uint a, uint b) -> bool
- LogicalUEqual, /// (uint a, uint b) -> bool
- LogicalULessEqual, /// (uint a, uint b) -> bool
- LogicalUGreaterThan, /// (uint a, uint b) -> bool
- LogicalUNotEqual, /// (uint a, uint b) -> bool
- LogicalUGreaterEqual, /// (uint a, uint b) -> bool
-
- Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-
- Texture, /// (MetaTexture, float[N] coords) -> float4
- TextureLod, /// (MetaTexture, float[N] coords) -> float4
- TextureGather, /// (MetaTexture, float[N] coords) -> float4
- TextureQueryDimensions, /// (MetaTexture, float a) -> float4
- TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
- TexelFetch, /// (MetaTexture, int[N], int) -> float4
-
- Branch, /// (uint branch_target) -> void
- PushFlowStack, /// (uint branch_target) -> void
- PopFlowStack, /// () -> void
- Exit, /// () -> void
- Discard, /// () -> void
-
- EmitVertex, /// () -> void
- EndPrimitive, /// () -> void
-
- YNegate, /// () -> float
- LocalInvocationIdX, /// () -> uint
- LocalInvocationIdY, /// () -> uint
- LocalInvocationIdZ, /// () -> uint
- WorkGroupIdX, /// () -> uint
- WorkGroupIdY, /// () -> uint
- WorkGroupIdZ, /// () -> uint
-
- Amount,
-};
-
-enum class InternalFlag {
- Zero = 0,
- Sign = 1,
- Carry = 2,
- Overflow = 3,
- Amount = 4,
-};
-
/// Describes the behaviour of code path of a given entry point and a return point.
enum class ExitMethod {
Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
@@ -208,71 +34,6 @@ enum class ExitMethod {
AlwaysEnd, ///< All code paths reach a END instruction.
};
-class Sampler {
-public:
- // Use this constructor for bounded Samplers
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{false} {}
-
- // Use this constructor for bindless Samplers
- explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
- // Use this only for serialization/deserialization
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{is_bindless} {}
-
- std::size_t GetOffset() const {
- return offset;
- }
-
- std::size_t GetIndex() const {
- return index;
- }
-
- Tegra::Shader::TextureType GetType() const {
- return type;
- }
-
- bool IsArray() const {
- return is_array;
- }
-
- bool IsShadow() const {
- return is_shadow;
- }
-
- bool IsBindless() const {
- return is_bindless;
- }
-
- std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- bool operator<(const Sampler& rhs) const {
- return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
- std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
- rhs.is_bindless);
- }
-
-private:
- /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
- /// instruction.
- std::size_t offset{};
- std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
- Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
- bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
-};
-
class ConstBuffer {
public:
explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -305,268 +66,11 @@ private:
bool is_indirect{};
};
-struct GlobalMemoryBase {
- u32 cbuf_index{};
- u32 cbuf_offset{};
-
- bool operator<(const GlobalMemoryBase& rhs) const {
- return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
- }
-};
-
struct GlobalMemoryUsage {
bool is_read{};
bool is_written{};
};
-struct MetaArithmetic {
- bool precise{};
-};
-
-struct MetaTexture {
- const Sampler& sampler;
- Node array{};
- Node depth_compare{};
- std::vector<Node> aoffi;
- Node bias{};
- Node lod{};
- Node component{};
- u32 element{};
-};
-
-constexpr MetaArithmetic PRECISE = {true};
-constexpr MetaArithmetic NO_PRECISE = {false};
-
-using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
-
-/// Holds any kind of operation that can be done in the IR
-class OperationNode final {
-public:
- explicit OperationNode(OperationCode code) : code{code} {}
-
- explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
-
- template <typename... T>
- explicit OperationNode(OperationCode code, const T*... operands)
- : OperationNode(code, {}, operands...) {}
-
- template <typename... T>
- explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
- : code{code}, meta{std::move(meta)}, operands{operands_...} {}
-
- explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
- : code{code}, meta{meta}, operands{std::move(operands)} {}
-
- explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
- : code{code}, operands{std::move(operands)} {}
-
- OperationCode GetCode() const {
- return code;
- }
-
- const Meta& GetMeta() const {
- return meta;
- }
-
- std::size_t GetOperandsCount() const {
- return operands.size();
- }
-
- Node operator[](std::size_t operand_index) const {
- return operands.at(operand_index);
- }
-
-private:
- const OperationCode code;
- const Meta meta;
- std::vector<Node> operands;
-};
-
-/// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final {
-public:
- explicit ConditionalNode(Node condition, std::vector<Node>&& code)
- : condition{condition}, code{std::move(code)} {}
-
- Node GetCondition() const {
- return condition;
- }
-
- const std::vector<Node>& GetCode() const {
- return code;
- }
-
-private:
- const Node condition; ///< Condition to be satisfied
- std::vector<Node> code; ///< Code to execute
-};
-
-/// A general purpose register
-class GprNode final {
-public:
- explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
-
- u32 GetIndex() const {
- return static_cast<u32>(index);
- }
-
-private:
- const Tegra::Shader::Register index;
-};
-
-/// A 32-bits value that represents an immediate value
-class ImmediateNode final {
-public:
- explicit constexpr ImmediateNode(u32 value) : value{value} {}
-
- u32 GetValue() const {
- return value;
- }
-
-private:
- const u32 value;
-};
-
-/// One of Maxwell's internal flags
-class InternalFlagNode final {
-public:
- explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
-
- InternalFlag GetFlag() const {
- return flag;
- }
-
-private:
- const InternalFlag flag;
-};
-
-/// A predicate register, it can be negated without additional nodes
-class PredicateNode final {
-public:
- explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
- : index{index}, negated{negated} {}
-
- Tegra::Shader::Pred GetIndex() const {
- return index;
- }
-
- bool IsNegated() const {
- return negated;
- }
-
-private:
- const Tegra::Shader::Pred index;
- const bool negated;
-};
-
-/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
-class AbufNode final {
-public:
- // Initialize for standard attributes (index is explicit).
- explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
- Node buffer = {})
- : buffer{buffer}, index{index}, element{element} {}
-
- // Initialize for physical attributes (index is a variable value).
- explicit constexpr AbufNode(Node physical_address, Node buffer = {})
- : physical_address{physical_address}, buffer{buffer} {}
-
- Tegra::Shader::Attribute::Index GetIndex() const {
- return index;
- }
-
- u32 GetElement() const {
- return element;
- }
-
- Node GetBuffer() const {
- return buffer;
- }
-
- bool IsPhysicalBuffer() const {
- return physical_address != nullptr;
- }
-
- Node GetPhysicalAddress() const {
- return physical_address;
- }
-
-private:
- Node physical_address{};
- Node buffer{};
- Tegra::Shader::Attribute::Index index{};
- u32 element{};
-};
-
-/// Constant buffer node, usually mapped to uniform buffers in GLSL
-class CbufNode final {
-public:
- explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {}
-
- u32 GetIndex() const {
- return index;
- }
-
- Node GetOffset() const {
- return offset;
- }
-
-private:
- const u32 index;
- const Node offset;
-};
-
-/// Local memory node
-class LmemNode final {
-public:
- explicit constexpr LmemNode(Node address) : address{address} {}
-
- Node GetAddress() const {
- return address;
- }
-
-private:
- const Node address;
-};
-
-/// Global memory node
-class GmemNode final {
-public:
- explicit constexpr GmemNode(Node real_address, Node base_address,
- const GlobalMemoryBase& descriptor)
- : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
-
- Node GetRealAddress() const {
- return real_address;
- }
-
- Node GetBaseAddress() const {
- return base_address;
- }
-
- const GlobalMemoryBase& GetDescriptor() const {
- return descriptor;
- }
-
-private:
- const Node real_address;
- const Node base_address;
- const GlobalMemoryBase descriptor;
-};
-
-/// Commentary, can be dropped
-class CommentNode final {
-public:
- explicit CommentNode(std::string text) : text{std::move(text)} {}
-
- const std::string& GetText() const {
- return text;
- }
-
-private:
- std::string text;
-};
-
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
@@ -663,26 +167,6 @@ private:
u32 DecodeXmad(NodeBlock& bb, u32 pc);
u32 DecodeOther(NodeBlock& bb, u32 pc);
- /// Internalizes node's data and returns a managed pointer to a clone of that node
- Node StoreNode(NodeData&& node_data);
-
- /// Creates a conditional node
- Node Conditional(Node condition, std::vector<Node>&& code);
- /// Creates a commentary
- Node Comment(std::string text);
- /// Creates an u32 immediate
- Node Immediate(u32 value);
- /// Creates a s32 immediate
- Node Immediate(s32 value) {
- return Immediate(static_cast<u32>(value));
- }
- /// Creates a f32 immediate
- Node Immediate(f32 value) {
- u32 integral;
- std::memcpy(&integral, &value, sizeof(u32));
- return Immediate(integral);
- }
-
/// Generates a node for a passed register.
Node GetRegister(Tegra::Shader::Register reg);
/// Generates a node representing a 19-bit immediate value
@@ -827,37 +311,6 @@ private:
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
- template <typename... T>
- Node Operation(OperationCode code, const T*... operands) {
- return StoreNode(OperationNode(code, operands...));
- }
-
- template <typename... T>
- Node Operation(OperationCode code, Meta&& meta, const T*... operands) {
- return StoreNode(OperationNode(code, std::move(meta), operands...));
- }
-
- Node Operation(OperationCode code, std::vector<Node>&& operands) {
- return StoreNode(OperationNode(code, std::move(operands)));
- }
-
- Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
- return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
- }
-
- template <typename... T>
- Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) {
- return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...));
- }
-
- template <typename... T>
- Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) {
- return StoreNode(
- OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...));
- }
-
- static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
-
const ProgramCode& program_code;
const u32 main_offset;
@@ -868,8 +321,6 @@ private:
std::map<u32, NodeBlock> basic_blocks;
NodeBlock global_code;
- std::vector<std::unique_ptr<NodeData>> stored_nodes;
-
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 19ede1eb9..fc957d980 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -16,12 +16,12 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
const Node node = code.at(cursor);
- if (const auto operation = std::get_if<OperationNode>(node)) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
if (operation->GetCode() == operation_code) {
return {node, cursor};
}
}
- if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
const auto& conditional_code = conditional->GetCode();
const auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
@@ -35,11 +35,11 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
} // namespace
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
- if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
+ if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
// Cbuf found, but it has to be immediate
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
}
- if (const auto gpr = std::get_if<GprNode>(tracked)) {
+ if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
return nullptr;
}
@@ -51,7 +51,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const
}
return TrackCbuf(source, code, new_cursor);
}
- if (const auto operation = std::get_if<OperationNode>(tracked)) {
+ if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
// Cbuf found in operand
@@ -60,7 +60,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const
}
return nullptr;
}
- if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
+ if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
const auto& conditional_code = conditional->GetCode();
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
}
@@ -75,7 +75,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
if (!found) {
return {};
}
- if (const auto immediate = std::get_if<ImmediateNode>(found)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
return immediate->GetValue();
}
return {};
@@ -88,11 +88,11 @@ std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeB
if (!found_node) {
return {};
}
- const auto operation = std::get_if<OperationNode>(found_node);
+ const auto operation = std::get_if<OperationNode>(&*found_node);
ASSERT(operation);
const auto& target = (*operation)[0];
- if (const auto gpr_target = std::get_if<GprNode>(target)) {
+ if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
if (gpr_target->GetIndex() == tracked->GetIndex()) {
return {(*operation)[1], new_cursor};
}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index bea0d5bc2..219bfd559 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -251,7 +251,7 @@ enum class WrapMode : u32 {
Mirror = 1,
ClampToEdge = 2,
Border = 3,
- ClampOGL = 4,
+ Clamp = 4,
MirrorOnceClampToEdge = 5,
MirrorOnceBorder = 6,
MirrorOnceClampOGL = 7,