27 files changed, 1320 insertions, 675 deletions
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index cca13bcde..8e5a22ab3 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -199,55 +199,48 @@ public:
     }
 
     std::optional<u32> GetGotoLabel() const {
-        auto inner = std::get_if<ASTGoto>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
             return {inner->label};
         }
-        return {};
+        return std::nullopt;
     }
 
     Expr GetGotoCondition() const {
-        auto inner = std::get_if<ASTGoto>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
             return inner->condition;
         }
         return nullptr;
     }
 
     void MarkLabelUnused() {
-        auto inner = std::get_if<ASTLabel>(&data);
-        if (inner) {
+        if (auto* inner = std::get_if<ASTLabel>(&data)) {
             inner->unused = true;
         }
     }
 
     bool IsLabelUnused() const {
-        auto inner = std::get_if<ASTLabel>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
             return inner->unused;
         }
         return true;
     }
 
     std::optional<u32> GetLabelIndex() const {
-        auto inner = std::get_if<ASTLabel>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
             return {inner->index};
         }
-        return {};
+        return std::nullopt;
     }
 
     Expr GetIfCondition() const {
-        auto inner = std::get_if<ASTIfThen>(&data);
-        if (inner) {
+        if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
             return inner->condition;
         }
         return nullptr;
     }
 
     void SetGotoCondition(Expr new_condition) {
-        auto inner = std::get_if<ASTGoto>(&data);
-        if (inner) {
+        if (auto* inner = std::get_if<ASTGoto>(&data)) {
             inner->condition = std::move(new_condition);
         }
     }
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..6920afdf2
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,216 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+
+namespace VideoCommon::Shader {
+
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+
+AsyncShaders::~AsyncShaders() {
+    KillWorkers();
+}
+
+void AsyncShaders::AllocateWorkers() {
+    // Use at least one thread
+    u32 num_workers = 1;
+
+    // Deduce how many more threads we can use
+    const u32 thread_count = std::thread::hardware_concurrency();
+    if (thread_count >= 8) {
+        // Increase async workers by 1 for every 2 threads >= 8
+        num_workers += 1 + (thread_count - 8) / 2;
+    }
+
+    // If we already have workers queued, ignore
+    if (num_workers == worker_threads.size()) {
+        return;
+    }
+
+    // If workers already exist, clear them
+    if (!worker_threads.empty()) {
+        FreeWorkers();
+    }
+
+    // Create workers
+    for (std::size_t i = 0; i < num_workers; i++) {
+        context_list.push_back(emu_window.CreateSharedContext());
+        worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
+                                    context_list[i].get());
+    }
+}
+
+void AsyncShaders::FreeWorkers() {
+    // Mark all threads to quit
+    is_thread_exiting.store(true);
+    cv.notify_all();
+    for (auto& thread : worker_threads) {
+        thread.join();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+void AsyncShaders::KillWorkers() {
+    is_thread_exiting.store(true);
+    for (auto& thread : worker_threads) {
+        thread.detach();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+bool AsyncShaders::HasWorkQueued() const {
+    return !pending_queue.empty();
+}
+
+bool AsyncShaders::HasCompletedWork() const {
+    std::shared_lock lock{completed_mutex};
+    return !finished_work.empty();
+}
+
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+    const auto& regs = gpu.Maxwell3D().regs;
+
+    // If something is using depth, we can assume that games are not rendering anything which will
+    // be used one time.
+    if (regs.zeta_enable) {
+        return true;
+    }
+
+    // If games are using a small index count, we can assume these are full screen quads. Usually
+    // these shaders are only used once for building textures so we can assume they can't be built
+    // async
+    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+        return false;
+    }
+
+    return true;
+}
+
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+    std::vector<Result> results;
+    {
+        std::unique_lock lock{completed_mutex};
+        results = std::move(finished_work);
+        finished_work.clear();
+    }
+    return results;
+}
+
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+                                     Tegra::Engines::ShaderType shader_type, u64 uid,
+                                     std::vector<u64> code, std::vector<u64> code_b,
+                                     u32 main_offset, CompilerSettings compiler_settings,
+                                     const Registry& registry, VAddr cpu_addr) {
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push({
+        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+        .device = &device,
+        .shader_type = shader_type,
+        .uid = uid,
+        .code = std::move(code),
+        .code_b = std::move(code_b),
+        .main_offset = main_offset,
+        .compiler_settings = compiler_settings,
+        .registry = registry,
+        .cpu_address = cpu_addr,
+    });
+    cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+                                     Vulkan::VKDescriptorPool& descriptor_pool,
+                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     Vulkan::VKRenderPassCache& renderpass_cache,
+                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
+                                     Vulkan::SPIRVProgram program,
+                                     Vulkan::GraphicsPipelineCacheKey key) {
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push({
+        .backend = Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .vk_device = &device,
+        .scheduler = &scheduler,
+        .descriptor_pool = &descriptor_pool,
+        .update_descriptor_queue = &update_descriptor_queue,
+        .renderpass_cache = &renderpass_cache,
+        .bindings = std::move(bindings),
+        .program = std::move(program),
+        .key = key,
+    });
+    cv.notify_one();
+}
+
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+        std::unique_lock lock{queue_mutex};
+        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
+        if (is_thread_exiting) {
+            return;
+        }
+
+        // Partial lock to allow all threads to read at the same time
+        if (!HasWorkQueued()) {
+            continue;
+        }
+        // Another thread beat us, just unlock and wait for the next load
+        if (pending_queue.empty()) {
+            continue;
+        }
+
+        // Pull work from queue
+        WorkerParams work = std::move(pending_queue.front());
+        pending_queue.pop();
+        lock.unlock();
+
+        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
+            const auto scope = context->Acquire();
+            auto program =
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
+            Result result{};
+            result.backend = work.backend;
+            result.cpu_address = work.cpu_address;
+            result.uid = work.uid;
+            result.code = std::move(work.code);
+            result.code_b = std::move(work.code_b);
+            result.shader_type = work.shader_type;
+
+            if (work.backend == Backend::OpenGL) {
+                result.program.opengl = std::move(program->source_program);
+            } else if (work.backend == Backend::GLASM) {
+                result.program.glasm = std::move(program->assembly_program);
+            }
+
+            {
+                std::unique_lock complete_lock(completed_mutex);
+                finished_work.push_back(std::move(result));
+            }
+        } else if (work.backend == Backend::Vulkan) {
+            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                *work.vk_device, *work.scheduler, *work.descriptor_pool,
+                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+                work.program);
+
+            work.pp_cache->EmplacePipeline(std::move(pipeline));
+        }
+    }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..7a99e1dc5
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,147 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+
+// This header includes both Vulkan and OpenGL headers, this has to be fixed
+// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
+// Forcefully include glad early and undefine macros
+#include <glad/glad.h>
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Vulkan {
+class VKPipelineCache;
+}
+
+namespace VideoCommon::Shader {
+
+class AsyncShaders {
+public:
+    enum class Backend {
+        OpenGL,
+        GLASM,
+        Vulkan,
+    };
+
+    struct ResultPrograms {
+        OpenGL::OGLProgram opengl;
+        OpenGL::OGLAssemblyProgram glasm;
+    };
+
+    struct Result {
+        u64 uid;
+        VAddr cpu_address;
+        Backend backend;
+        ResultPrograms program;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        Tegra::Engines::ShaderType shader_type;
+    };
+
+    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+    ~AsyncShaders();
+
+    /// Start up shader worker threads
+    void AllocateWorkers();
+
+    /// Clear the shader queue and kill all worker threads
+    void FreeWorkers();
+
+    // Force end all threads
+    void KillWorkers();
+
+    /// Check to see if any shaders have actually been compiled
+    [[nodiscard]] bool HasCompletedWork() const;
+
+    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+    /// every shader async as some shaders are only built and executed once. We try to "guess" which
+    /// shader would be used only once
+    [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
+
+    /// Pulls completed compiled shaders
+    [[nodiscard]] std::vector<Result> GetCompletedWork();
+
+    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+                           CompilerSettings compiler_settings, const Registry& registry,
+                           VAddr cpu_addr);
+
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+                           Vulkan::VKScheduler& scheduler,
+                           Vulkan::VKDescriptorPool& descriptor_pool,
+                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                           Vulkan::VKRenderPassCache& renderpass_cache,
+                           std::vector<VkDescriptorSetLayoutBinding> bindings,
+                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+
+private:
+    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+
+    /// Check our worker queue to see if we have any work queued already
+    [[nodiscard]] bool HasWorkQueued() const;
+
+    struct WorkerParams {
+        Backend backend;
+        // For OGL
+        const OpenGL::Device* device;
+        Tegra::Engines::ShaderType shader_type;
+        u64 uid;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        u32 main_offset;
+        CompilerSettings compiler_settings;
+        std::optional<Registry> registry;
+        VAddr cpu_address;
+
+        // For Vulkan
+        Vulkan::VKPipelineCache* pp_cache;
+        const Vulkan::VKDevice* vk_device;
+        Vulkan::VKScheduler* scheduler;
+        Vulkan::VKDescriptorPool* descriptor_pool;
+        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+        Vulkan::VKRenderPassCache* renderpass_cache;
+        std::vector<VkDescriptorSetLayoutBinding> bindings;
+        Vulkan::SPIRVProgram program;
+        Vulkan::GraphicsPipelineCacheKey key;
+    };
+
+    std::condition_variable cv;
+    mutable std::mutex queue_mutex;
+    mutable std::shared_mutex completed_mutex;
+    std::atomic<bool> is_thread_exiting{};
+    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+    std::vector<std::thread> worker_threads;
+    std::queue<WorkerParams> pending_queue;
+    std::vector<Result> finished_work;
+    Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 2e2711350..4c8971615 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
 #include "common/common_types.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/control_flow.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -115,17 +116,6 @@ Pred GetPredicate(u32 index, bool negated) {
     return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
 }
 
-/**
- * Returns whether the instruction at the specified offset is a 'sched' instruction.
- * Sched instructions always appear before a sequence of 3 instructions.
- */
-constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
-    constexpr u32 SchedPeriod = 4;
-    u32 absolute_offset = offset - main_offset;
-
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
 enum class ParseResult : u32 {
     ControlCaught,
     BlockEnd,
@@ -197,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state,
 
 std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
                                     u64 ldc_tracked_register) {
-    return TrackInstruction<u64>(state, pos,
-                                 [ldc_tracked_register](auto instr, const auto& opcode) {
-                                     return opcode.GetId() == OpCode::Id::SHL_IMM &&
-                                            instr.gpr0.Value() == ldc_tracked_register;
-                                 },
-                                 [](auto instr, const auto&) { return instr.gpr8.Value(); });
+    return TrackInstruction<u64>(
+        state, pos,
+        [ldc_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::SHL_IMM &&
+                   instr.gpr0.Value() == ldc_tracked_register;
+        },
+        [](auto instr, const auto&) { return instr.gpr8.Value(); });
 }
 
 std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
                                    u64 shl_tracked_register) {
-    return TrackInstruction<u32>(state, pos,
-                                 [shl_tracked_register](auto instr, const auto& opcode) {
-                                     return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
-                                            instr.gpr0.Value() == shl_tracked_register;
-                                 },
-                                 [](auto instr, const auto&) {
-                                     return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
-                                 });
+    return TrackInstruction<u32>(
+        state, pos,
+        [shl_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+                   instr.gpr0.Value() == shl_tracked_register;
+        },
+        [](auto instr, const auto&) {
+            return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+        });
 }
 
 std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
@@ -484,17 +476,17 @@ bool TryInspectAddress(CFGRebuildState& state) {
     }
     case BlockCollision::Inside: {
         // This case is the tricky one:
-        // We need to Split the block in 2 sepparate blocks
+        // We need to split the block into 2 separate blocks
         const u32 end = state.block_info[block_index].end;
         BlockInfo& new_block = CreateBlockInfo(state, address, end);
         BlockInfo& current_block = state.block_info[block_index];
         current_block.end = address - 1;
-        new_block.branch = current_block.branch;
+        new_block.branch = std::move(current_block.branch);
         BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
         const auto branch = std::get_if<SingleBranch>(forward_branch.get());
         branch->address = address;
         branch->ignore = true;
-        current_block.branch = forward_branch;
+        current_block.branch = std::move(forward_branch);
         return true;
     }
     default:
@@ -555,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) {
     gather_labels(q2.ssy_stack, state.ssy_labels, block);
     gather_labels(q2.pbk_stack, state.pbk_labels, block);
     if (std::holds_alternative<SingleBranch>(*block.branch)) {
-        const auto branch = std::get_if<SingleBranch>(block.branch.get());
+        auto* branch = std::get_if<SingleBranch>(block.branch.get());
         if (!branch->condition.IsUnconditional()) {
             q2.address = block.end + 1;
             state.queries.push_back(q2);
         }
 
-        Query conditional_query{q2};
+        auto& conditional_query = state.queries.emplace_back(q2);
         if (branch->is_sync) {
             if (branch->address == unassigned_branch) {
                 branch->address = conditional_query.ssy_stack.top();
@@ -575,23 +567,21 @@ bool TryQuery(CFGRebuildState& state) {
             conditional_query.pbk_stack.pop();
         }
         conditional_query.address = branch->address;
-        state.queries.push_back(std::move(conditional_query));
         return true;
     }
-    const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+
+    const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
     for (const auto& branch_case : multi_branch->branches) {
-        Query conditional_query{q2};
+        auto& conditional_query = state.queries.emplace_back(q2);
         conditional_query.address = branch_case.address;
-        state.queries.push_back(std::move(conditional_query));
     }
+
     return true;
 }
 
-} // Anonymous namespace
-
 void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
-    const auto get_expr = ([&](const Condition& cond) -> Expr {
-        Expr result{};
+    const auto get_expr = [](const Condition& cond) -> Expr {
+        Expr result;
         if (cond.cc != ConditionCode::T) {
             result = MakeExpr<ExprCondCode>(cond.cc);
         }
@@ -604,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
             }
             Expr extra = MakeExpr<ExprPredicate>(pred);
             if (negate) {
-                extra = MakeExpr<ExprNot>(extra);
+                extra = MakeExpr<ExprNot>(std::move(extra));
             }
             if (result) {
-                return MakeExpr<ExprAnd>(extra, result);
+                return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
             }
             return extra;
         }
@@ -615,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
             return result;
         }
         return MakeExpr<ExprBoolean>(true);
-    });
+    };
+
     if (std::holds_alternative<SingleBranch>(*branch_info)) {
-        const auto branch = std::get_if<SingleBranch>(branch_info.get());
+        const auto* branch = std::get_if<SingleBranch>(branch_info.get());
         if (branch->address < 0) {
             if (branch->kill) {
                 mm.InsertReturn(get_expr(branch->condition), true);
@@ -629,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
         mm.InsertGoto(get_expr(branch->condition), branch->address);
         return;
     }
-    const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+    const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
     for (const auto& branch_case : multi_branch->branches) {
         mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
                       branch_case.address);
@@ -655,6 +646,8 @@ void DecompileShader(CFGRebuildState& state) {
     state.manager->Decompile();
 }
 
+} // Anonymous namespace
+
 std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
                                                 const CompilerSettings& settings,
                                                 Registry& registry) {
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 87ac9ac6c..eeac328a6 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -13,6 +13,7 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/control_flow.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -23,17 +24,6 @@ using Tegra::Shader::OpCode;
 
 namespace {
 
-/**
- * Returns whether the instruction at the specified offset is a 'sched' instruction.
- * Sched instructions always appear before a sequence of 3 instructions.
- */
-constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
-    constexpr u32 SchedPeriod = 4;
-    u32 absolute_offset = offset - main_offset;
-
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
 void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
                               const std::list<Sampler>& used_samplers) {
     if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
@@ -42,11 +32,11 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
     u32 count{};
     std::vector<u32> bound_offsets;
     for (const auto& sampler : used_samplers) {
-        if (sampler.IsBindless()) {
+        if (sampler.is_bindless) {
             continue;
         }
         ++count;
-        bound_offsets.emplace_back(sampler.GetOffset());
+        bound_offsets.emplace_back(sampler.offset);
     }
     if (count > 1) {
         gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
@@ -56,14 +46,14 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
 std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
                                         VideoCore::GuestDriverProfile& gpu_driver,
                                         const std::list<Sampler>& used_samplers) {
-    const u32 base_offset = sampler_to_deduce.GetOffset();
+    const u32 base_offset = sampler_to_deduce.offset;
     u32 max_offset{std::numeric_limits<u32>::max()};
     for (const auto& sampler : used_samplers) {
-        if (sampler.IsBindless()) {
+        if (sampler.is_bindless) {
             continue;
         }
-        if (sampler.GetOffset() > base_offset) {
-            max_offset = std::min(sampler.GetOffset(), max_offset);
+        if (sampler.offset > base_offset) {
+            max_offset = std::min(sampler.offset, max_offset);
         }
     }
     if (max_offset == std::numeric_limits<u32>::max()) {
@@ -265,7 +255,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
         Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
         Node op_b = Immediate(branch_case.cmp_value);
         Node condition =
-            GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+            GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
         auto result = Conditional(condition, {n});
         bb.push_back(result);
         global_code.push_back(result);
@@ -363,14 +353,14 @@ void ShaderIR::PostDecode() {
         return;
     }
     for (auto& sampler : used_samplers) {
-        if (!sampler.IsIndexed()) {
+        if (!sampler.is_indexed) {
             continue;
         }
         if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
-            sampler.SetSize(*size);
+            sampler.size = *size;
         } else {
             LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
-            sampler.SetSize(1);
+            sampler.size = 1;
         }
     }
 }
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 4db329fa5..afef5948d 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::FCMP_RR:
-    case OpCode::Id::FCMP_RC: {
+    case OpCode::Id::FCMP_RC:
+    case OpCode::Id::FCMP_IMMR: {
         UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
         Node op_c = GetRegister(instr.gpr39);
         Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index ee7d9a29d..88103fede 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -19,22 +19,49 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
-        opcode->get().GetId() == OpCode::Id::HADD2_R) {
+    bool negate_a = false;
+    bool negate_b = false;
+    bool absolute_a = false;
+    bool absolute_b = false;
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HADD2_R:
         if (instr.alu_half.ftz == 0) {
             LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        negate_b = ((instr.value >> 31) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 30) & 1) != 0;
+        break;
+    case OpCode::Id::HADD2_C:
+        if (instr.alu_half.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+        }
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        negate_b = ((instr.value >> 56) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 54) & 1) != 0;
+        break;
+    case OpCode::Id::HMUL2_R:
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 30) & 1) != 0;
+        break;
+    case OpCode::Id::HMUL2_C:
+        negate_b = ((instr.value >> 31) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 54) & 1) != 0;
+        break;
+    default:
+        UNREACHABLE();
+        break;
     }
 
-    const bool negate_a =
-        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
-    const bool negate_b =
-        opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
-
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
+    op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
 
-    auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
+    auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
         switch (opcode->get().GetId()) {
         case OpCode::Id::HADD2_C:
         case OpCode::Id::HMUL2_C:
@@ -48,17 +75,16 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
         }
     }();
     op_b = UnpackHalfFloat(op_b, type_b);
-    // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
-    Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+    op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
 
-    Node value = [&]() {
+    Node value = [this, opcode, op_a, op_b = op_b] {
         switch (opcode->get().GetId()) {
         case OpCode::Id::HADD2_C:
         case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
         case OpCode::Id::HMUL2_C:
         case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
         default:
             UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
             return Immediate(0);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 0f4c3103a..73155966f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -35,15 +35,38 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
     case OpCode::Id::IADD_C:
     case OpCode::Id::IADD_R:
     case OpCode::Id::IADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
+        UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
 
         op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
         op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
 
-        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+        Node value = Operation(OperationCode::UAdd, op_a, op_b);
 
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        if (instr.iadd.x) {
+            Node carry = GetInternalFlag(InternalFlag::Carry);
+            Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
+            value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
+        }
+
+        if (instr.generates_cc) {
+            const Node i0 = Immediate(0);
+
+            Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
+            Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
+            Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
+
+            Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
+            Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
+            Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
+            Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
+
+            SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
+            SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
+            SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
+            SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
+        }
+        SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::IADD3_C:
@@ -75,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
         op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
         op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
 
-        const Node value = [&]() {
-            const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+        const Node value = [&] {
+            Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
             if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
                 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
             }
-            const Node shifted = [&]() {
+            const Node shifted = [&] {
                 switch (instr.iadd3.mode) {
                 case Tegra::Shader::IAdd3Mode::RightShift:
                     // TODO(tech4me): According to
@@ -249,8 +272,8 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
             }
             case OpCode::Id::LEA_IMM: {
                 const bool neg = instr.lea.imm.neg != 0;
-                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
-                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
                         Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
             }
             case OpCode::Id::LEA_RZ: {
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 73880db0e..2a30aab2b 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
     case OpCode::Id::IADD32I: {
         UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
 
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+        op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
 
-        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+        Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
 
-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
+        SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::LOP32I: {
-        if (instr.alu.lop32i.invert_a)
-            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+        if (instr.alu.lop32i.invert_a) {
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
+        }
 
-        if (instr.alu.lop32i.invert_b)
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+        if (instr.alu.lop32i.invert_b) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+        }
 
-        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
-                            PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
+                            std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
+                            instr.op_32.generates_cc != 0);
         break;
     }
     default:
@@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
 void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
                                    Node op_b, PredicateResultMode predicate_mode, Pred predicate,
                                    bool sets_cc) {
-    const Node result = [&]() {
+    Node result = [&] {
         switch (logic_op) {
         case LogicOperation::And:
-            return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+            return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
         case LogicOperation::Or:
-            return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+            return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
         case LogicOperation::Xor:
-            return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+            return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
         case LogicOperation::PassB:
             return op_b;
         default:
@@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
         return;
     case PredicateResultMode::NotZero: {
         // Set the predicate to true if the result is not zero.
-        const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
-        SetPredicate(bb, static_cast<u64>(predicate), compare);
+        Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
+        SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
         break;
     }
     default:
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
 
 u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    if (instr.hset2.ftz == 0) {
-        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+    PredCondition cond;
+    bool bf;
+    bool ftz;
+    bool neg_a;
+    bool abs_a;
+    bool neg_b;
+    bool abs_b;
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HSET2_C:
+    case OpCode::Id::HSET2_IMM:
+        cond = instr.hsetp2.cbuf_and_imm.cond;
+        bf = instr.Bit(53);
+        ftz = instr.Bit(54);
+        neg_a = instr.Bit(43);
+        abs_a = instr.Bit(44);
+        neg_b = instr.Bit(56);
+        abs_b = instr.Bit(54);
+        break;
+    case OpCode::Id::HSET2_R:
+        cond = instr.hsetp2.reg.cond;
+        bf = instr.Bit(49);
+        ftz = instr.Bit(50);
+        neg_a = instr.Bit(43);
+        abs_a = instr.Bit(44);
+        neg_b = instr.Bit(31);
+        abs_b = instr.Bit(30);
+        break;
+    default:
+        UNREACHABLE();
     }
 
-    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
-
-    Node op_b = [&]() {
+    Node op_b = [this, instr, opcode] {
         switch (opcode->get().GetId()) {
+        case OpCode::Id::HSET2_C:
+            // Inform as unimplemented as this is not tested.
+            UNIMPLEMENTED_MSG("HSET2_C is not implemented");
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
         case OpCode::Id::HSET2_R:
             return GetRegister(instr.gpr20);
+        case OpCode::Id::HSET2_IMM:
+            return UnpackHalfImmediate(instr, true);
         default:
             UNREACHABLE();
-            return Immediate(0);
+            return Node{};
         }
     }();
-    op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
-    op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
 
-    const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+    if (!ftz) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+    }
+
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HSET2_R:
+        op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
+        [[fallthrough]];
+    case OpCode::Id::HSET2_C:
+        op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
+        break;
+    default:
+        break;
+    }
 
-    const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
+    Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+    Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
 
     const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
 
     // HSET2 operates on each half float in the pack.
     std::array<Node, 2> values;
     for (u32 i = 0; i < 2; ++i) {
-        const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
-        const Node true_value = Immediate(raw_value << (i * 16));
-        const Node false_value = Immediate(0);
-
-        const Node comparison =
-            Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
-        const Node predicate = Operation(combiner, comparison, second_pred);
+        const u32 raw_value = bf ? 0x3c00 : 0xffff;
+        Node true_value = Immediate(raw_value << (i * 16));
+        Node false_value = Immediate(0);
 
+        Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+        Node predicate = Operation(combiner, comparison, second_pred);
         values[i] =
-            Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+            Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
     }
 
-    const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
-    SetRegister(bb, instr.gpr0, value);
+    Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
+    SetRegister(bb, instr.gpr0, move(value));
 
     return pc;
 }
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 08ebca38b..1ed4212ee 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
                                std::size_t component) {
     const TextureFormat format{descriptor.format};
     switch (format) {
-    case TextureFormat::R16_G16_B16_A16:
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R32_G32_B32:
-    case TextureFormat::R32_G32:
-    case TextureFormat::R16_G16:
+    case TextureFormat::R16G16B16A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R32G32B32:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
     case TextureFormat::R32:
     case TextureFormat::R16:
     case TextureFormat::R8:
@@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
         break;
     case TextureFormat::B5G6R5:
     case TextureFormat::B6G5R5:
+    case TextureFormat::B10G11R11:
         if (component == 0) {
             return descriptor.b_type;
         }
@@ -107,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
             return descriptor.r_type;
         }
         break;
-    case TextureFormat::G8R24:
-    case TextureFormat::G24R8:
-    case TextureFormat::G8R8:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
     case TextureFormat::G4R4:
         if (component == 0) {
             return descriptor.g_type;
@@ -118,6 +119,8 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
             return descriptor.r_type;
         }
         break;
+    default:
+        break;
     }
     UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
     return ComponentType::FLOAT;
@@ -136,15 +139,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
 
 u32 GetComponentSize(TextureFormat format, std::size_t component) {
     switch (format) {
-    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32G32B32A32:
         return 32;
-    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R16G16B16A16:
         return 16;
-    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32G32B32:
         return component <= 2 ? 32 : 0;
-    case TextureFormat::R32_G32:
+    case TextureFormat::R32G32:
         return component <= 1 ? 32 : 0;
-    case TextureFormat::R16_G16:
+    case TextureFormat::R16G16:
         return component <= 1 ? 16 : 0;
     case TextureFormat::R32:
         return component == 0 ? 32 : 0;
@@ -191,7 +194,15 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 6;
         }
         return 0;
-    case TextureFormat::G8R24:
+    case TextureFormat::B10G11R11:
+        if (component == 1 || component == 2) {
+            return 11;
+        }
+        if (component == 0) {
+            return 10;
+        }
+        return 0;
+    case TextureFormat::R24G8:
         if (component == 0) {
             return 8;
         }
@@ -199,7 +210,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 24;
         }
         return 0;
-    case TextureFormat::G24R8:
+    case TextureFormat::R8G24:
         if (component == 0) {
             return 24;
         }
@@ -207,7 +218,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 8;
         }
         return 0;
-    case TextureFormat::G8R8:
+    case TextureFormat::R8G8:
         return (component == 0 || component == 1) ? 8 : 0;
     case TextureFormat::G4R4:
         return (component == 0 || component == 1) ? 4 : 0;
@@ -223,24 +234,25 @@ std::size_t GetImageComponentMask(TextureFormat format) {
     constexpr u8 B = 0b0100;
     constexpr u8 A = 0b1000;
     switch (format) {
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R16G16B16A16:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A4B4G4R4:
     case TextureFormat::A5B5G5R1:
     case TextureFormat::A1B5G5R5:
         return std::size_t{R | G | B | A};
-    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32G32B32:
     case TextureFormat::R32_B24G8:
     case TextureFormat::B5G6R5:
     case TextureFormat::B6G5R5:
+    case TextureFormat::B10G11R11:
         return std::size_t{R | G | B};
-    case TextureFormat::R32_G32:
-    case TextureFormat::R16_G16:
-    case TextureFormat::G8R24:
-    case TextureFormat::G24R8:
-    case TextureFormat::G8R8:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
     case TextureFormat::G4R4:
         return std::size_t{R | G};
     case TextureFormat::R32:
@@ -299,7 +311,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
             return {std::move(original_value), true};
         }
     default:
-        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+        UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
         return {std::move(original_value), true};
     }
 }
@@ -352,8 +364,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                         registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
                 } else {
                     const Node image_register = GetRegister(instr.gpr39);
-                    const auto [base_image, buffer, offset] = TrackCbuf(
-                        image_register, global_code, static_cast<s64>(global_code.size()));
+                    const auto result = TrackCbuf(image_register, global_code,
+                                                  static_cast<s64>(global_code.size()));
+                    const auto buffer = std::get<1>(result);
+                    const auto offset = std::get<2>(result);
                     descriptor = registry.ObtainBindlessSampler(buffer, offset);
                 }
                 if (!descriptor) {
@@ -453,11 +467,14 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                     return OperationCode::AtomicImageXor;
                 case Tegra::Shader::ImageAtomicOperation::Exch:
                     return OperationCode::AtomicImageExchange;
+                default:
+                    break;
                 }
+                break;
             default:
                 break;
             }
-            UNIMPLEMENTED_MSG("Unimplemented operation={} type={}",
+            UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
                               static_cast<u64>(instr.suatom_d.operation.Value()),
                               static_cast<u64>(instr.suatom_d.operation_type.Value()));
             return OperationCode::AtomicImageAdd;
@@ -483,11 +500,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
     const auto offset = static_cast<u32>(image.index.Value());
 
-    const auto it =
-        std::find_if(std::begin(used_images), std::end(used_images),
-                     [offset](const Image& entry) { return entry.GetOffset() == offset; });
+    const auto it = std::find_if(std::begin(used_images), std::end(used_images),
+                                 [offset](const Image& entry) { return entry.offset == offset; });
     if (it != std::end(used_images)) {
-        ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+        ASSERT(!it->is_bindless && it->type == type);
         return *it;
     }
 
@@ -497,16 +513,18 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t
 
 Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
     const Node image_register = GetRegister(reg);
-    const auto [base_image, buffer, offset] =
+    const auto result =
         TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
 
-    const auto it =
-        std::find_if(std::begin(used_images), std::end(used_images),
-                     [buffer = buffer, offset = offset](const Image& entry) {
-                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
-                     });
+    const auto buffer = std::get<1>(result);
+    const auto offset = std::get<2>(result);
+
+    const auto it = std::find_if(std::begin(used_images), std::end(used_images),
+                                 [buffer, offset](const Image& entry) {
+                                     return entry.buffer == buffer && entry.offset == offset;
+                                 });
     if (it != std::end(used_images)) {
-        ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+        ASSERT(it->is_bindless && it->type == type);
         return *it;
     }
 
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 8112ead3e..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,8 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::RED: {
-        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
-        UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
+        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+                             static_cast<int>(instr.red.type.Value()));
         const auto [real_address, base_address, descriptor] =
             TrackGlobalMemory(bb, instr, true, true);
         if (!real_address || !base_address) {
@@ -396,7 +396,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }
         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
         Node value = GetRegister(instr.gpr0);
-        bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
+        bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
         break;
     }
     case OpCode::Id::ATOM: {
@@ -472,14 +472,14 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
 
     const auto [base_address, index, offset] =
         TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
-                          { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
-                          "Global memory tracking failed");
+    ASSERT_OR_EXECUTE_MSG(
+        base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+        "Global memory tracking failed");
 
     bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
 
     const GlobalMemoryBase descriptor{index, offset};
-    const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
+    const auto& entry = used_global_memory.try_emplace(descriptor).first;
     auto& usage = entry->second;
     usage.is_written |= is_write;
     usage.is_read |= is_read;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d4f95b18c..29a7cfbfe 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -75,15 +75,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
-                return Immediate(0U);
+                return Operation(OperationCode::ThreadId);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
                 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
-                return Immediate(0U);
+                return Immediate(0x00ff'0000U);
             case SystemVariable::WscaleFactorXY:
                 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
                 return Immediate(0U);
@@ -109,6 +108,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                 return Operation(OperationCode::WorkGroupIdY);
             case SystemVariable::CtaIdZ:
                 return Operation(OperationCode::WorkGroupIdZ);
+            case SystemVariable::EqMask:
+            case SystemVariable::LtMask:
+            case SystemVariable::LeMask:
+            case SystemVariable::GtMask:
+            case SystemVariable::GeMask:
+                uses_warps = true;
+                switch (instr.sys20) {
+                case SystemVariable::EqMask:
+                    return Operation(OperationCode::ThreadEqMask);
+                case SystemVariable::LtMask:
+                    return Operation(OperationCode::ThreadLtMask);
+                case SystemVariable::LeMask:
+                    return Operation(OperationCode::ThreadLeMask);
+                case SystemVariable::GtMask:
+                    return Operation(OperationCode::ThreadGtMask);
+                case SystemVariable::GeMask:
+                    return Operation(OperationCode::ThreadGeMask);
+                default:
+                    UNREACHABLE();
+                    return Immediate(0u);
+                }
             default:
                 UNIMPLEMENTED_MSG("Unhandled system move: {}",
                                   static_cast<u32>(instr.sys20.Value()));
@@ -272,10 +292,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
         break;
     }
+    case OpCode::Id::BAR: {
+        UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
+        bb.push_back(Operation(OperationCode::Barrier));
+        break;
+    }
     case OpCode::Id::MEMBAR: {
-        UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
         UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
-        bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+        const OperationCode type = [instr] {
+            switch (instr.membar.type) {
+            case Tegra::Shader::MembarType::CTA:
+                return OperationCode::MemoryBarrierGroup;
+            case Tegra::Shader::MembarType::GL:
+                return OperationCode::MemoryBarrierGlobal;
+            default:
+                UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
+                return OperationCode::MemoryBarrierGlobal;
+            }
+        }();
+        bb.push_back(Operation(type));
         break;
     }
     case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index 8d54cce34..6116c31aa 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <utility>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -10,20 +12,20 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
 namespace {
-constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7;
-}
+constexpr u64 NUM_CONDITION_CODES = 4;
+constexpr u64 NUM_PREDICATES = 7;
+} // namespace
 
 u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr);
-
-    const Node apply_mask = [&] {
+    Node apply_mask = [this, opcode, instr] {
         switch (opcode->get().GetId()) {
         case OpCode::Id::R2P_IMM:
         case OpCode::Id::P2R_IMM:
@@ -34,39 +36,43 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
         }
     }();
 
-    const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
+    const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
+
+    const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
+    const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
+    const auto get_entry = [this, cc](u64 entry) {
+        return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
+    };
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::R2P_IMM: {
-        const Node mask = GetRegister(instr.gpr8);
+        Node mask = GetRegister(instr.gpr8);
 
-        for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
-            const auto shift = static_cast<u32>(pred);
+        for (u64 entry = 0; entry < num_entries; ++entry) {
+            const u32 shift = static_cast<u32>(entry);
 
-            const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
-            const Node condition =
-                Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+            Node apply = BitfieldExtract(apply_mask, shift, 1);
+            Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
 
-            const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
-            const Node value =
-                Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+            Node compare = BitfieldExtract(mask, offset + shift, 1);
+            Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
 
-            const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
-            bb.push_back(Conditional(condition, {code}));
+            Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
+            bb.push_back(Conditional(condition, {move(code)}));
         }
         break;
     }
     case OpCode::Id::P2R_IMM: {
         Node value = Immediate(0);
-        for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
-            Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred),
+        for (u64 entry = 0; entry < num_entries; ++entry) {
+            Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
                                  Immediate(0));
-            value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit));
+            value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
         }
-        value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask);
-        value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8);
+        value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
+        value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
 
-        SetRegister(bb, instr.gpr0, std::move(value));
+        SetRegister(bb, instr.gpr0, move(value));
         break;
     }
     default:
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 3b391d3e6..d4ffa8014 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -23,7 +23,6 @@ Node IsFull(Node shift) {
 }
 
 Node Shift(OperationCode opcode, Node value, Node shift) {
-    Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32));
     Node shifted = Operation(opcode, move(value), shift);
     return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
 }
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 6c4a1358b..02fdccd86 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -139,15 +139,15 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
         const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
 
-        const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};
-        const Sampler& sampler = *GetSampler(instr.sampler, info);
+        SamplerInfo info;
+        info.is_shadow = is_depth_compare;
+        const std::optional<Sampler> sampler = GetSampler(instr.sampler, info);
 
         Node4 values;
         for (u32 element = 0; element < values.size(); ++element) {
-            auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, depth_compare, aoffi,   {}, {},
-                             {},      {}, component,     element, {}};
-            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
+            MetaTexture meta{*sampler, {}, depth_compare, aoffi,   {}, {},
+                             {},       {}, component,     element, {}};
+            values[element] = Operation(OperationCode::TextureGather, meta, coords);
         }
 
         if (instr.tld4s.fp16_flag) {
@@ -165,19 +165,20 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                              "AOFFI is not implemented");
 
         const bool is_array = instr.txd.is_array != 0;
-        u64 base_reg = instr.gpr8.Value();
         const auto derivate_reg = instr.gpr20.Value();
         const auto texture_type = instr.txd.texture_type.Value();
         const auto coord_count = GetCoordCount(texture_type);
-        Node index_var{};
-        const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})
-                        : GetSampler(instr.sampler, {{texture_type, is_array, false}});
+        u64 base_reg = instr.gpr8.Value();
+        Node index_var;
+        SamplerInfo info;
+        info.type = texture_type;
+        info.is_array = is_array;
+        const std::optional<Sampler> sampler = is_bindless
+                                                   ? GetBindlessSampler(base_reg, info, index_var)
+                                                   : GetSampler(instr.sampler, info);
         Node4 values;
-        if (sampler == nullptr) {
-            for (u32 element = 0; element < values.size(); ++element) {
-                values[element] = Immediate(0);
-            }
+        if (!sampler) {
+            std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
             WriteTexInstructionFloat(bb, instr, values);
             break;
         }
@@ -215,14 +216,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         is_bindless = true;
         [[fallthrough]];
     case OpCode::Id::TXQ: {
-        // TODO: The new commits on the texture refactor, change the way samplers work.
-        // Sadly, not all texture instructions specify the type of texture their sampler
-        // uses. This must be fixed at a later instance.
-        Node index_var{};
-        const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);
-
-        if (sampler == nullptr) {
+        Node index_var;
+        const std::optional<Sampler> sampler = is_bindless
+                                                   ? GetBindlessSampler(instr.gpr8, {}, index_var)
+                                                   : GetSampler(instr.sampler, {});
+
+        if (!sampler) {
             u32 indexer = 0;
             for (u32 element = 0; element < 4; ++element) {
                 if (!instr.txq.IsComponentEnabled(element)) {
@@ -268,13 +267,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
                              "NDV is not implemented");
 
-        auto texture_type = instr.tmml.texture_type.Value();
+        const auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
-        Node index_var{};
-        const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
-
-        if (sampler == nullptr) {
+        SamplerInfo info;
+        info.type = texture_type;
+        info.is_array = is_array;
+        Node index_var;
+        const std::optional<Sampler> sampler =
+            is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
+                        : GetSampler(instr.sampler, info);
+
+        if (!sampler) {
             u32 indexer = 0;
             for (u32 element = 0; element < 2; ++element) {
                 if (!instr.tmml.IsComponentEnabled(element)) {
@@ -289,34 +292,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             break;
         }
 
-        std::vector<Node> coords;
-
-        // TODO: Add coordinates for different samplers once other texture types are implemented.
-        switch (texture_type) {
-        case TextureType::Texture1D:
-            coords.push_back(GetRegister(instr.gpr8));
-            break;
-        case TextureType::Texture2D:
-            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
-            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
+        const u64 base_index = is_array ? 1 : 0;
+        const u64 num_components = [texture_type] {
+            switch (texture_type) {
+            case TextureType::Texture1D:
+                return 1;
+            case TextureType::Texture2D:
+                return 2;
+            case TextureType::TextureCube:
+                return 3;
+            default:
+                UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type));
+                return 2;
+            }
+        }();
+        // TODO: What's the array component used for?
 
-            // Fallback to interpreting as a 2D texture for now
-            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
-            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
-            texture_type = TextureType::Texture2D;
+        std::vector<Node> coords;
+        coords.reserve(num_components);
+        for (u64 component = 0; component < num_components; ++component) {
+            coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
         }
+
         u32 indexer = 0;
         for (u32 element = 0; element < 2; ++element) {
             if (!instr.tmml.IsComponentEnabled(element)) {
                 continue;
             }
-            auto params = coords;
             MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
-            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
-            SetTemporary(bb, indexer++, value);
+            Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
+            SetTemporary(bb, indexer++, std::move(value));
         }
         for (u32 i = 0; i < indexer; ++i) {
             SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
@@ -355,98 +360,122 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset,
-                                               std::optional<u32> buffer) {
-    if (sampler_info) {
-        return *sampler_info;
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
+    SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
+    if (info.IsComplete()) {
+        return info;
     }
-    const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
-                                : registry.ObtainBoundSampler(offset);
     if (!sampler) {
         LOG_WARNING(HW_GPU, "Unknown sampler info");
-        return SamplerInfo{TextureType::Texture2D, false, false, false};
-    }
-    return SamplerInfo{sampler->texture_type, sampler->is_array != 0, sampler->is_shadow != 0,
-                       sampler->is_buffer != 0};
+        info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
+        info.is_array = info.is_array.value_or(false);
+        info.is_shadow = info.is_shadow.value_or(false);
+        info.is_buffer = info.is_buffer.value_or(false);
+        return info;
+    }
+    info.type = info.type.value_or(sampler->texture_type);
+    info.is_array = info.is_array.value_or(sampler->is_array != 0);
+    info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
+    info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
+    return info;
 }
 
-const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
-                                    std::optional<SamplerInfo> sampler_info) {
-    const auto offset = static_cast<u32>(sampler.index.Value());
-    const auto info = GetSamplerInfo(sampler_info, offset);
+std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
+                                            SamplerInfo sampler_info) {
+    const u32 offset = static_cast<u32>(sampler.index.Value());
+    const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
 
     // If this sampler has already been used, return the existing mapping.
-    const auto it =
-        std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+    const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
+                                 [offset](const Sampler& entry) { return entry.offset == offset; });
     if (it != used_samplers.end()) {
-        ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
-               it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
-        return &*it;
+        ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
+               it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+        return *it;
     }
 
     // Otherwise create a new mapping for this sampler
     const auto next_index = static_cast<u32>(used_samplers.size());
-    return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
-                                       info.is_buffer, false);
+    return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
+                                      *info.is_shadow, *info.is_buffer, false);
 }
 
-const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
-                                            std::optional<SamplerInfo> sampler_info) {
+std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
+                                                    Node& index_var) {
     const Node sampler_register = GetRegister(reg);
     const auto [base_node, tracked_sampler_info] =
         TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT(base_node != nullptr);
-    if (base_node == nullptr) {
-        return nullptr;
+    if (!base_node) {
+        UNREACHABLE();
+        return std::nullopt;
     }
 
-    if (const auto bindless_sampler_info =
-            std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
-        const u32 buffer = bindless_sampler_info->GetIndex();
-        const u32 offset = bindless_sampler_info->GetOffset();
-        const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+    if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+        const u32 buffer = sampler_info->index;
+        const u32 offset = sampler_info->offset;
+        info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
 
         // If this sampler has already been used, return the existing mapping.
-        const auto it =
-            std::find_if(used_samplers.begin(), used_samplers.end(),
-                         [buffer = buffer, offset = offset](const Sampler& entry) {
-                             return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
-                         });
+        const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
+                                     [buffer, offset](const Sampler& entry) {
+                                         return entry.buffer == buffer && entry.offset == offset;
+                                     });
         if (it != used_samplers.end()) {
-            ASSERT(it->IsBindless() && it->GetType() == info.type &&
-                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow);
-            return &*it;
+            ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow);
+            return *it;
         }
 
         // Otherwise create a new mapping for this sampler
         const auto next_index = static_cast<u32>(used_samplers.size());
-        return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
-                                           info.is_shadow, info.is_buffer, false);
-    } else if (const auto array_sampler_info =
-                   std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
-        const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
-        index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
-        const auto info = GetSamplerInfo(sampler_info, base_offset);
+        return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer, false);
+    }
+    if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
+        const std::pair indices = sampler_info->indices;
+        const std::pair offsets = sampler_info->offsets;
+        info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
+
+        // Try to use an already created sampler if it exists
+        const auto it = std::find_if(
+            used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
+                return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+                       indices == std::pair{entry.buffer, entry.secondary_buffer};
+            });
+        if (it != used_samplers.end()) {
+            ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+            return *it;
+        }
+
+        // Otherwise create a new mapping for this sampler
+        const u32 next_index = static_cast<u32>(used_samplers.size());
+        return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer);
+    }
+    if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+        const u32 base_offset = sampler_info->base_offset / 4;
+        index_var = GetCustomVariable(sampler_info->bindless_var);
+        info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
 
         // If this sampler has already been used, return the existing mapping.
         const auto it = std::find_if(
             used_samplers.begin(), used_samplers.end(),
-            [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; });
+            [base_offset](const Sampler& entry) { return entry.offset == base_offset; });
         if (it != used_samplers.end()) {
-            ASSERT(!it->IsBindless() && it->GetType() == info.type &&
-                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
-                   it->IsBuffer() == info.is_buffer && it->IsIndexed());
-            return &*it;
+            ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
+                   it->is_indexed);
+            return *it;
         }
 
         uses_indexed_samplers = true;
         // Otherwise create a new mapping for this sampler
         const auto next_index = static_cast<u32>(used_samplers.size());
-        return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
-                                           info.is_shadow, info.is_buffer, true);
+        return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer, true);
     }
-    return nullptr;
+    return std::nullopt;
 }
 
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -527,14 +556,19 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
     const bool is_shadow = depth_compare != nullptr;
     const bool is_bindless = bindless_reg.has_value();
 
-    UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow);
     ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
                "Illegal texture type");
 
-    const SamplerInfo info{texture_type, is_array, is_shadow, false};
+    SamplerInfo info;
+    info.type = texture_type;
+    info.is_array = is_array;
+    info.is_shadow = is_shadow;
+    info.is_buffer = false;
+
     Node index_var;
-    const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
-                                         : GetSampler(instr.sampler, info);
+    const std::optional<Sampler> sampler = is_bindless
+                                               ? GetBindlessSampler(*bindless_reg, info, index_var)
+                                               : GetSampler(instr.sampler, info);
     if (!sampler) {
         return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
     }
@@ -593,8 +627,9 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
         ++parameter_register;
     }
 
-    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
-        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
+                                                              lod_bias_enabled, 4, 5);
+    const auto coord_count = std::get<0>(coord_counts);
     // If enabled arrays index is always stored in the gpr8 field
     const u64 array_register = instr.gpr8.Value();
     // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
@@ -632,8 +667,10 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
     const bool lod_bias_enabled =
         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
 
-    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
-        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
+                                                              lod_bias_enabled, 4, 4);
+    const auto coord_count = std::get<0>(coord_counts);
+
     // If enabled arrays index is always stored in the gpr8 field
     const u64 array_register = instr.gpr8.Value();
     // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
@@ -682,12 +719,17 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
 
     u64 parameter_register = instr.gpr20.Value();
 
-    const SamplerInfo info{texture_type, is_array, depth_compare, false};
-    Node index_var{};
-    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)
-                                         : GetSampler(instr.sampler, info);
+    SamplerInfo info;
+    info.type = texture_type;
+    info.is_array = is_array;
+    info.is_shadow = depth_compare;
+
+    Node index_var;
+    const std::optional<Sampler> sampler =
+        is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
+                    : GetSampler(instr.sampler, info);
     Node4 values;
-    if (sampler == nullptr) {
+    if (!sampler) {
         for (u32 element = 0; element < values.size(); ++element) {
             values[element] = Immediate(0);
         }
@@ -723,7 +765,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
 
 Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     const auto texture_type{instr.tld.texture_type};
-    const bool is_array{instr.tld.is_array};
+    const bool is_array{instr.tld.is_array != 0};
     const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
     const std::size_t coord_count{GetCoordCount(texture_type)};
 
@@ -742,12 +784,12 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
     // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
 
-    const auto& sampler = *GetSampler(instr.sampler);
+    const std::optional<Sampler> sampler = GetSampler(instr.sampler, {});
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
+        MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
 
@@ -755,7 +797,11 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
 }
 
 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
-    const Sampler& sampler = *GetSampler(instr.sampler);
+    SamplerInfo info;
+    info.type = texture_type;
+    info.is_array = is_array;
+    info.is_shadow = false;
+    const std::optional<Sampler> sampler = GetSampler(instr.sampler, info);
 
     const std::size_t type_coord_count = GetCoordCount(texture_type);
     const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
@@ -783,7 +829,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
+        MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
     return values;
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 64ba60ea2..1c0957277 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
-                               Tegra::Shader::VideoType type, u64 byte_height) {
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
+                               u64 byte_height) {
     if (!is_chunk) {
         return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
     }
-    const Node zero = Immediate(0);
 
     switch (type) {
-    case Tegra::Shader::VideoType::Size16_Low:
+    case VideoType::Size16_Low:
         return BitfieldExtract(op, 0, 16);
-    case Tegra::Shader::VideoType::Size16_High:
+    case VideoType::Size16_High:
         return BitfieldExtract(op, 16, 16);
-    case Tegra::Shader::VideoType::Size32:
+    case VideoType::Size32:
         // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
         // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
         UNIMPLEMENTED();
-        return zero;
-    case Tegra::Shader::VideoType::Invalid:
+        return Immediate(0);
+    case VideoType::Invalid:
         UNREACHABLE_MSG("Invalid instruction encoding");
-        return zero;
+        return Immediate(0);
     default:
         UNREACHABLE();
-        return zero;
+        return Immediate(0);
     }
 }
 
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 6191ffba1..233b8fa42 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -81,35 +81,36 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     SetTemporary(bb, 0, product);
     product = GetTemporary(0);
 
-    const Node original_c = op_c;
+    Node original_c = op_c;
     const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
-    op_c = [&]() {
+    op_c = [&] {
         switch (set_mode) {
         case Tegra::Shader::XmadMode::None:
             return original_c;
         case Tegra::Shader::XmadMode::CLo:
-            return BitfieldExtract(original_c, 0, 16);
+            return BitfieldExtract(std::move(original_c), 0, 16);
         case Tegra::Shader::XmadMode::CHi:
-            return BitfieldExtract(original_c, 16, 16);
+            return BitfieldExtract(std::move(original_c), 16, 16);
         case Tegra::Shader::XmadMode::CBcc: {
-            const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
-                                                   original_b, Immediate(16));
-            return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+            Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+                                             original_b, Immediate(16));
+            return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
+                                   std::move(shifted_b));
         }
         case Tegra::Shader::XmadMode::CSfu: {
-            const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
-                                                              op_a, Immediate(0));
-            const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
-                                                              op_b, Immediate(0));
+            const Node comp_a =
+                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
+            const Node comp_b =
+                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
             const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
 
             const Node comp_minus_a = GetPredicateComparisonInteger(
-                PredCondition::NotEqual, is_signed_a,
+                PredCondition::NE, is_signed_a,
                 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
                                 Immediate(0x80000000)),
                 Immediate(0));
             const Node comp_minus_b = GetPredicateComparisonInteger(
-                PredCondition::NotEqual, is_signed_b,
+                PredCondition::NE, is_signed_b,
                 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
                                 Immediate(0x80000000)),
                 Immediate(0));
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
new file mode 100644
index 000000000..e18ccba8e
--- /dev/null
+++ b/src/video_core/shader/memory_util.cpp
@@ -0,0 +1,76 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+
+#include <boost/container_hash/hash.hpp>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader/memory_util.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
+                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
+    const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+    return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
+    static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
+
+    const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
+    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
+    code.resize(CalculateProgramSize(code, is_compute));
+    return code;
+}
+
+u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
+                        const ProgramCode& code_b) {
+    size_t unique_identifier = boost::hash_value(code);
+    if (is_a) {
+        // VertexA programs include two programs
+        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
+    }
+    return static_cast<u64>(unique_identifier);
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
new file mode 100644
index 000000000..4624d38e6
--- /dev/null
+++ b/src/video_core/shader/memory_util.h
@@ -0,0 +1,43 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCommon::Shader {
+
+using ProgramCode = std::vector<u64>;
+
+constexpr u32 STAGE_MAIN_OFFSET = 10;
+constexpr u32 KERNEL_MAIN_OFFSET = 0;
+
+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
+                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
+
+/// Gets if the current instruction offset is a scheduler instruction
+bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute);
+
+/// Hashes one (or two) program streams
+u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
+                        const ProgramCode& code_b = {});
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 3eee961f5..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -110,13 +110,20 @@ enum class OperationCode {
     LogicalPick2,  /// (bool2 pair, uint index) -> bool
     LogicalAnd2,   /// (bool2 a) -> bool
 
-    LogicalFLessThan,     /// (float a, float b) -> bool
-    LogicalFEqual,        /// (float a, float b) -> bool
-    LogicalFLessEqual,    /// (float a, float b) -> bool
-    LogicalFGreaterThan,  /// (float a, float b) -> bool
-    LogicalFNotEqual,     /// (float a, float b) -> bool
-    LogicalFGreaterEqual, /// (float a, float b) -> bool
-    LogicalFIsNan,        /// (float a) -> bool
+    LogicalFOrdLessThan,       /// (float a, float b) -> bool
+    LogicalFOrdEqual,          /// (float a, float b) -> bool
+    LogicalFOrdLessEqual,      /// (float a, float b) -> bool
+    LogicalFOrdGreaterThan,    /// (float a, float b) -> bool
+    LogicalFOrdNotEqual,       /// (float a, float b) -> bool
+    LogicalFOrdGreaterEqual,   /// (float a, float b) -> bool
+    LogicalFOrdered,           /// (float a, float b) -> bool
+    LogicalFUnordered,         /// (float a, float b) -> bool
+    LogicalFUnordLessThan,     /// (float a, float b) -> bool
+    LogicalFUnordEqual,        /// (float a, float b) -> bool
+    LogicalFUnordLessEqual,    /// (float a, float b) -> bool
+    LogicalFUnordGreaterThan,  /// (float a, float b) -> bool
+    LogicalFUnordNotEqual,     /// (float a, float b) -> bool
+    LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
 
     LogicalILessThan,     /// (int a, int b) -> bool
     LogicalIEqual,        /// (int a, int b) -> bool
@@ -132,6 +139,8 @@ enum class OperationCode {
     LogicalUNotEqual,     /// (uint a, uint b) -> bool
     LogicalUGreaterEqual, /// (uint a, uint b) -> bool
 
+    LogicalAddCarry, /// (uint a, uint b) -> bool
+
     Logical2HLessThan,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
     Logical2HEqual,               /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
     Logical2HLessEqual,           /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
@@ -217,9 +226,16 @@ enum class OperationCode {
     VoteEqual,    /// (bool) -> bool
 
     ThreadId,       /// () -> uint
+    ThreadEqMask,   /// () -> uint
+    ThreadGeMask,   /// () -> uint
+    ThreadGtMask,   /// () -> uint
+    ThreadLeMask,   /// () -> uint
+    ThreadLtMask,   /// () -> uint
     ShuffleIndexed, /// (uint value, uint index) -> uint
 
-    MemoryBarrierGL, /// () -> void
+    Barrier,             /// () -> void
+    MemoryBarrierGroup,  /// () -> void
+    MemoryBarrierGlobal, /// () -> void
 
     Amount,
 };
@@ -259,133 +275,76 @@ using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
 
-class BindlessSamplerNode;
-class ArraySamplerNode;
+struct ArraySamplerNode;
+struct BindlessSamplerNode;
+struct SeparateSamplerNode;
 
-using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
 using TrackSampler = std::shared_ptr<TrackSamplerData>;
 
-class Sampler {
-public:
-    /// This constructor is for bound samplers
+struct Sampler {
+    /// Bound samplers constructor
     constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
                                bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
           is_buffer{is_buffer}, is_indexed{is_indexed} {}
 
-    /// This constructor is for bindless samplers
+    /// Separate sampler constructor
+    constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
+                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
+                               bool is_buffer)
+        : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
+          buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
+          is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
+
+    /// Bindless samplers constructor
     constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
                                bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
           is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
 
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-    constexpr u32 GetOffset() const {
-        return offset;
-    }
-
-    constexpr u32 GetBuffer() const {
-        return buffer;
-    }
-
-    constexpr Tegra::Shader::TextureType GetType() const {
-        return type;
-    }
-
-    constexpr bool IsArray() const {
-        return is_array;
-    }
-
-    constexpr bool IsShadow() const {
-        return is_shadow;
-    }
-
-    constexpr bool IsBuffer() const {
-        return is_buffer;
-    }
-
-    constexpr bool IsBindless() const {
-        return is_bindless;
-    }
-
-    constexpr bool IsIndexed() const {
-        return is_indexed;
-    }
-
-    constexpr u32 Size() const {
-        return size;
-    }
-
-    constexpr void SetSize(u32 new_size) {
-        size = new_size;
-    }
-
-private:
-    u32 index{};  ///< Emulated index given for the this sampler.
-    u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
-    u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
-    u32 size{1};  ///< Size of the sampler.
+    u32 index = 0;            ///< Emulated index given for the this sampler.
+    u32 offset = 0;           ///< Offset in the const buffer from where the sampler is being read.
+    u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
+    u32 buffer = 0;           ///< Buffer where the bindless sampler is read.
+    u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
+    u32 size = 1;             ///< Size of the sampler.
 
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
-    bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
-    bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
-    bool is_buffer{};   ///< Whether the texture is a texture buffer without sampler.
-    bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
-    bool is_indexed{};  ///< Whether this sampler is an indexed array of textures.
+    bool is_array = false;     ///< Whether the texture is being sampled as an array texture or not.
+    bool is_shadow = false;    ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_buffer = false;    ///< Whether the texture is a texture buffer without sampler.
+    bool is_bindless = false;  ///< Whether this sampler belongs to a bindless texture or not.
+    bool is_indexed = false;   ///< Whether this sampler is an indexed array of textures.
+    bool is_separated = false; ///< Whether the image and sampler is separated or not.
 };
 
 /// Represents a tracked bindless sampler into a direct const buffer
-class ArraySamplerNode final {
-public:
-    explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
-        : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
-
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-    constexpr u32 GetBaseOffset() const {
-        return base_offset;
-    }
-
-    constexpr u32 GetIndexVar() const {
-        return bindless_var;
-    }
-
-private:
+struct ArraySamplerNode {
     u32 index;
     u32 base_offset;
     u32 bindless_var;
 };
 
-/// Represents a tracked bindless sampler into a direct const buffer
-class BindlessSamplerNode final {
-public:
-    explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
-
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-    constexpr u32 GetOffset() const {
-        return offset;
-    }
+/// Represents a tracked separate sampler image pair that was folded statically
+struct SeparateSamplerNode {
+    std::pair<u32, u32> indices;
+    std::pair<u32, u32> offsets;
+};
 
-private:
+/// Represents a tracked bindless sampler into a direct const buffer
+struct BindlessSamplerNode {
     u32 index;
     u32 offset;
 };
 
-class Image final {
+struct Image {
 public:
-    /// This constructor is for bound images
+    /// Bound images constructor
     constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
         : index{index}, offset{offset}, type{type} {}
 
-    /// This constructor is for bindless samplers
+    /// Bindless samplers constructor
     constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
 
@@ -403,53 +362,20 @@ public:
         is_atomic = true;
     }
 
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-    constexpr u32 GetOffset() const {
-        return offset;
-    }
-
-    constexpr u32 GetBuffer() const {
-        return buffer;
-    }
-
-    constexpr Tegra::Shader::ImageType GetType() const {
-        return type;
-    }
-
-    constexpr bool IsBindless() const {
-        return is_bindless;
-    }
-
-    constexpr bool IsWritten() const {
-        return is_written;
-    }
-
-    constexpr bool IsRead() const {
-        return is_read;
-    }
-
-    constexpr bool IsAtomic() const {
-        return is_atomic;
-    }
-
-private:
-    u32 index{};
-    u32 offset{};
-    u32 buffer{};
+    u32 index = 0;
+    u32 offset = 0;
+    u32 buffer = 0;
 
     Tegra::Shader::ImageType type{};
-    bool is_bindless{};
-    bool is_written{};
-    bool is_read{};
-    bool is_atomic{};
+    bool is_bindless = false;
+    bool is_written = false;
+    bool is_read = false;
+    bool is_atomic = false;
 };
 
 struct GlobalMemoryBase {
-    u32 cbuf_index{};
-    u32 cbuf_offset{};
+    u32 cbuf_index = 0;
+    u32 cbuf_offset = 0;
 
     bool operator<(const GlobalMemoryBase& rhs) const {
         return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
@@ -463,7 +389,7 @@ struct MetaArithmetic {
 
 /// Parameters describing a texture sampler
 struct MetaTexture {
-    const Sampler& sampler;
+    Sampler sampler;
     Node array;
     Node depth_compare;
     std::vector<Node> aoffi;
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
 template <typename T, typename... Args>
 TrackSampler MakeTrackSampler(Args&&... args) {
     static_assert(std::is_convertible_v<T, TrackSamplerData>);
-    return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+    return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
 }
 
 template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..148d91fcb 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac
     if (shader_stage == ShaderType::Compute) {
         return {};
     }
-    auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
-
-    GraphicsInfo info;
-    info.tfb_layouts = graphics.regs.tfb_layouts;
-    info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
-    info.primitive_topology = graphics.regs.draw.topology;
-    info.tessellation_primitive = graphics.regs.tess_mode.prim;
-    info.tessellation_spacing = graphics.regs.tess_mode.spacing;
-    info.tfb_enabled = graphics.regs.tfb_enabled;
-    info.tessellation_clockwise = graphics.regs.tess_mode.cw;
-    return info;
+
+    auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+    return {
+        .tfb_layouts = graphics.regs.tfb_layouts,
+        .tfb_varying_locs = graphics.regs.tfb_varying_locs,
+        .primitive_topology = graphics.regs.draw.topology,
+        .tessellation_primitive = graphics.regs.tess_mode.prim,
+        .tessellation_spacing = graphics.regs.tess_mode.spacing,
+        .tfb_enabled = graphics.regs.tfb_enabled != 0,
+        .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
+    };
 }
 
 ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
     if (shader_stage != ShaderType::Compute) {
         return {};
     }
-    auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+
+    auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
     const auto& launch = compute.launch_description;
 
-    ComputeInfo info;
-    info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
-    info.local_memory_size_in_words = launch.local_pos_alloc;
-    info.shared_memory_size_in_words = launch.shared_alloc;
-    return info;
+    return {
+        .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
+        .shared_memory_size_in_words = launch.shared_alloc,
+        .local_memory_size_in_words = launch.local_pos_alloc,
+    };
 }
 
 } // Anonymous namespace
 
-Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
     : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
       bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
 
-Registry::Registry(Tegra::Engines::ShaderType shader_stage,
-                   Tegra::Engines::ConstBufferEngineInterface& engine)
-    : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
-      graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
-                                                                 shader_stage, engine)} {}
+Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
+    : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
+      graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
+                                                                  shader_stage, engine_)} {}
 
 Registry::~Registry() = default;
 
@@ -93,8 +94,27 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
     return value;
 }
 
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
-                                                                                 u32 offset) {
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
+    std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
+    SeparateSamplerKey key;
+    key.buffers = buffers;
+    key.offsets = offsets;
+    const auto iter = separate_samplers.find(key);
+    if (iter != separate_samplers.end()) {
+        return iter->second;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+
+    const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
+    const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
+    const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
+    separate_samplers.emplace(key, value);
+    return value;
+}
+
+std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
     const std::pair key = {buffer, offset};
     const auto iter = bindless_samplers.find(key);
     if (iter != bindless_samplers.end()) {
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..4bebefdde 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
 
 namespace VideoCommon::Shader {
 
+struct SeparateSamplerKey {
+    std::pair<u32, u32> buffers;
+    std::pair<u32, u32> offsets;
+};
+
+} // namespace VideoCommon::Shader
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::Shader::SeparateSamplerKey> {
+    std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
+        return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
+                                key.offsets.second);
+    }
+};
+
+template <>
+struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
+    bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
+                    const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
+        return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon::Shader {
+
 using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
 using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using SeparateSamplerMap =
+    std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
 using BindlessSamplerMap =
     std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
 
@@ -63,7 +94,7 @@ public:
     explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
 
     explicit Registry(Tegra::Engines::ShaderType shader_stage,
-                      Tegra::Engines::ConstBufferEngineInterface& engine);
+                      Tegra::Engines::ConstBufferEngineInterface& engine_);
 
     ~Registry();
 
@@ -73,6 +104,9 @@ public:
 
     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
 
+    std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
+        std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
+
     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
 
     /// Inserts a key.
@@ -128,6 +162,7 @@ private:
     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
     KeyMap keys;
     BoundSamplerMap bound_samplers;
+    SeparateSamplerMap separate_samplers;
     BindlessSamplerMap bindless_samplers;
     u32 bound_buffer;
     GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 8852c8a1b..29d794b34 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
@@ -56,8 +57,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
     const auto index = static_cast<u32>(index_);
     const auto offset = static_cast<u32>(offset_);
 
-    const auto [entry, is_new] = used_cbufs.try_emplace(index);
-    entry->second.MarkAsUsed(offset);
+    used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
 
     return MakeNode<CbufNode>(index, Immediate(offset));
 }
@@ -66,8 +66,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
     const auto index = static_cast<u32>(index_);
     const auto offset = static_cast<u32>(offset_);
 
-    const auto [entry, is_new] = used_cbufs.try_emplace(index);
-    entry->second.MarkAsUsedIndirect();
+    used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
 
     Node final_offset = [&] {
         // Attempt to inline constant buffer without a variable offset. This is done to allow
@@ -113,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
 }
 
 Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
-    const Node node = MakeNode<InternalFlagNode>(flag);
+    Node node = MakeNode<InternalFlagNode>(flag);
     if (negated) {
-        return Operation(OperationCode::LogicalNegate, node);
+        return Operation(OperationCode::LogicalNegate, std::move(node));
     }
     return node;
 }
@@ -166,6 +165,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe
                                 std::move(value), Immediate(16));
         value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
                                 std::move(value), Immediate(16));
+        return value;
     case Register::Size::Word:
         // Default - do nothing
         return value;
@@ -244,56 +244,44 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
 }
 
 Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
+    if (condition == PredCondition::T) {
+        return GetPredicate(true);
+    } else if (condition == PredCondition::F) {
+        return GetPredicate(false);
+    }
+
     static constexpr std::array comparison_table{
-        std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
-        std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
-        std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
-        std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
-        std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
-        std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
-        std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
-        std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
-        std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
-        std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
-        std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+        OperationCode(0),
+        OperationCode::LogicalFOrdLessThan,       // LT
+        OperationCode::LogicalFOrdEqual,          // EQ
+        OperationCode::LogicalFOrdLessEqual,      // LE
+        OperationCode::LogicalFOrdGreaterThan,    // GT
+        OperationCode::LogicalFOrdNotEqual,       // NE
+        OperationCode::LogicalFOrdGreaterEqual,   // GE
+        OperationCode::LogicalFOrdered,           // NUM
+        OperationCode::LogicalFUnordered,         // NAN
+        OperationCode::LogicalFUnordLessThan,     // LTU
+        OperationCode::LogicalFUnordEqual,        // EQU
+        OperationCode::LogicalFUnordLessEqual,    // LEU
+        OperationCode::LogicalFUnordGreaterThan,  // GTU
+        OperationCode::LogicalFUnordNotEqual,     // NEU
+        OperationCode::LogicalFUnordGreaterEqual, // GEU
     };
+    const std::size_t index = static_cast<std::size_t>(condition);
+    ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
 
-    const auto comparison =
-        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
-                     [condition](const auto entry) { return condition == entry.first; });
-    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
-                         "Unknown predicate comparison operation");
-
-    Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
-
-    if (condition == PredCondition::LessThanWithNan ||
-        condition == PredCondition::NotEqualWithNan ||
-        condition == PredCondition::LessEqualWithNan ||
-        condition == PredCondition::GreaterThanWithNan ||
-        condition == PredCondition::GreaterEqualWithNan) {
-        predicate = Operation(OperationCode::LogicalOr, predicate,
-                              Operation(OperationCode::LogicalFIsNan, op_a));
-        predicate = Operation(OperationCode::LogicalOr, predicate,
-                              Operation(OperationCode::LogicalFIsNan, op_b));
-    }
-
-    return predicate;
+    return Operation(comparison_table[index], op_a, op_b);
 }
 
 Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
                                              Node op_b) {
     static constexpr std::array comparison_table{
-        std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
-        std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
-        std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
-        std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
-        std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
-        std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
-        std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
-        std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
-        std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
-        std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
-        std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+        std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
+        std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
+        std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
+        std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
+        std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
+        std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
     };
 
     const auto comparison =
@@ -302,32 +290,24 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
     UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
                          "Unknown predicate comparison operation");
 
-    Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
-                                     std::move(op_b));
-
-    UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
-                             condition == PredCondition::NotEqualWithNan ||
-                             condition == PredCondition::LessEqualWithNan ||
-                             condition == PredCondition::GreaterThanWithNan ||
-                             condition == PredCondition::GreaterEqualWithNan,
-                         "NaN comparisons for integers are not implemented");
-    return predicate;
+    return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
+                           std::move(op_b));
 }
 
 Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
                                           Node op_b) {
     static constexpr std::array comparison_table{
-        std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
-        std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
-        std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
-        std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
-        std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
-        std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
-        std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
-        std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
-        std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
-        std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
-        std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+        std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
+        std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
+        std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
+        std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
+        std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
+        std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
+        std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
+        std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
+        std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
+        std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
+        std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
     };
 
     const auto comparison =
@@ -398,7 +378,7 @@ void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc
     if (!sets_cc) {
         return;
     }
-    Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
+    Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
     SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
     LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
 }
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index c6e7bdf50..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,6 +18,7 @@
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/registry.h"
 
@@ -25,16 +26,13 @@ namespace VideoCommon::Shader {
 
 struct ShaderBlock;
 
-using ProgramCode = std::vector<u64>;
-
 constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
 
-class ConstBuffer {
-public:
-    explicit ConstBuffer(u32 max_offset, bool is_indirect)
+struct ConstBuffer {
+    constexpr explicit ConstBuffer(u32 max_offset, bool is_indirect)
         : max_offset{max_offset}, is_indirect{is_indirect} {}
 
-    ConstBuffer() = default;
+    constexpr ConstBuffer() = default;
 
     void MarkAsUsed(u64 offset) {
         max_offset = std::max(max_offset, static_cast<u32>(offset));
@@ -57,8 +55,8 @@ public:
     }
 
 private:
-    u32 max_offset{};
-    bool is_indirect{};
+    u32 max_offset = 0;
+    bool is_indirect = false;
 };
 
 struct GlobalMemoryUsage {
@@ -192,10 +190,14 @@ private:
     friend class ASTDecoder;
 
     struct SamplerInfo {
-        Tegra::Shader::TextureType type;
-        bool is_array;
-        bool is_shadow;
-        bool is_buffer;
+        std::optional<Tegra::Shader::TextureType> type;
+        std::optional<bool> is_array;
+        std::optional<bool> is_shadow;
+        std::optional<bool> is_buffer;
+
+        constexpr bool IsComplete() const noexcept {
+            return type && is_array && is_shadow && is_buffer;
+        }
     };
 
     void Decode();
@@ -328,16 +330,15 @@ private:
     OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
 
     /// Queries the missing sampler info from the execution context.
-    SamplerInfo GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset,
-                               std::optional<u32> buffer = std::nullopt);
+    SamplerInfo GetSamplerInfo(SamplerInfo info,
+                               std::optional<Tegra::Engines::SamplerDescriptor> sampler);
 
-    /// Accesses a texture sampler
-    const Sampler* GetSampler(const Tegra::Shader::Sampler& sampler,
-                              std::optional<SamplerInfo> sampler_info = std::nullopt);
+    /// Accesses a texture sampler.
+    std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
 
     /// Accesses a texture sampler for a bindless texture.
-    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
-                                      std::optional<SamplerInfo> sampler_info = std::nullopt);
+    std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
+                                              Node& index_var);
 
     /// Accesses an image.
     Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -408,8 +409,14 @@ private:
 
     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
-                                                        s64 cursor);
+    std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                       s64 cursor);
+
+    std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
+                                                             const OperationNode& operation,
+                                                             Node gpr, Node base_offset,
+                                                             Node tracked, const NodeBlock& code,
+                                                             s64 cursor);
 
     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 10739b37d..6be3ea92b 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
 namespace VideoCommon::Shader {
 
 namespace {
+
 std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                    OperationCode operation_code) {
     for (; cursor >= 0; --cursor) {
@@ -27,8 +28,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
 
         if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
             const auto& conditional_code = conditional->GetCode();
-            auto [found, internal_cursor] = FindOperation(
+            auto result = FindOperation(
                 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
+            auto& found = result.first;
             if (found) {
                 return {std::move(found), cursor};
             }
@@ -62,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
     if (const auto operation = std::get_if<OperationNode>(&*node)) {
         operation->SetAmendIndex(amend_index);
         return true;
-    } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+    }
+    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
         conditional->SetAmendIndex(amend_index);
         return true;
     }
@@ -71,39 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
 
 } // Anonymous namespace
 
-std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
-                                                              s64 cursor) {
+std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                             s64 cursor) {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+        const u32 cbuf_index = cbuf->GetIndex();
+
         // Constant buffer found, test if it's an immediate
-        const auto offset = cbuf->GetOffset();
+        const auto& offset = cbuf->GetOffset();
         if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
-            auto track =
-                MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+            auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
             return {tracked, track};
-        } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
+        }
+        if (const auto operation = std::get_if<OperationNode>(&*offset)) {
             const u32 bound_buffer = registry.GetBoundBuffer();
-            if (bound_buffer != cbuf->GetIndex()) {
+            if (bound_buffer != cbuf_index) {
                 return {};
             }
-            const auto pair = DecoupleIndirectRead(*operation);
-            if (!pair) {
-                return {};
+            if (const std::optional pair = DecoupleIndirectRead(*operation)) {
+                auto [gpr, base_offset] = *pair;
+                return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
+                                                  code, cursor);
             }
-            auto [gpr, base_offset] = *pair;
-            const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
-            const auto& gpu_driver = registry.AccessGuestDriverProfile();
-            const u32 bindless_cv = NewCustomVariable();
-            const Node op =
-                Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
-
-            const Node cv_node = GetCustomVariable(bindless_cv);
-            Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
-            const std::size_t amend_index = DeclareAmend(amend_op);
-            AmendNodeCv(amend_index, code[cursor]);
-            // TODO Implement Bindless Index custom variable
-            auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
-                                                            offset_inm->GetValue(), bindless_cv);
-            return {tracked, track};
         }
         return {};
     }
@@ -120,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
         return TrackBindlessSampler(source, code, new_cursor);
     }
     if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
-        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
-            if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
-                std::get<0>(found)) {
-                // Cbuf found in operand.
+        const OperationNode& op = *operation;
+
+        const OperationCode opcode = operation->GetCode();
+        if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
+            ASSERT(op.GetOperandsCount() == 2);
+            auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
+            auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
+            if (node_a && node_b) {
+                auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
+                                                                   std::pair{offset_a, offset_b});
+                return {tracked, std::move(track)};
+            }
+        }
+        std::size_t i = op.GetOperandsCount();
+        while (i--) {
+            if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
+                // Constant buffer found in operand.
                 return found;
             }
         }
@@ -137,11 +141,31 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
     return {};
 }
 
+std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
+    const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
+    const NodeBlock& code, s64 cursor) {
+    const auto offset_imm = std::get<ImmediateNode>(*base_offset);
+    const auto& gpu_driver = registry.AccessGuestDriverProfile();
+    const u32 bindless_cv = NewCustomVariable();
+    const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
+    Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
+
+    Node cv_node = GetCustomVariable(bindless_cv);
+    Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
+    const std::size_t amend_index = DeclareAmend(std::move(amend_op));
+    AmendNodeCv(amend_index, code[cursor]);
+
+    // TODO: Implement bindless index custom variable
+    auto track =
+        MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
+    return {tracked, track};
+}
+
 std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
                                                s64 cursor) const {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
         // Constant buffer found, test if it's an immediate
-        const auto offset = cbuf->GetOffset();
+        const auto& offset = cbuf->GetOffset();
         if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
             return {tracked, cbuf->GetIndex(), immediate->GetValue()};
         }
@@ -151,21 +175,13 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
         if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
             return {};
         }
-        s64 current_cursor = cursor;
-        while (current_cursor > 0) {
-            // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
-            // register that it uses as operand
-            const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
-            current_cursor = new_cursor;
-            if (!source) {
-                continue;
-            }
-            const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
-            if (base_address != nullptr) {
-                return {base_address, index, offset};
-            }
+        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+        // register that it uses as operand
+        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
+        if (!source) {
+            return {};
         }
-        return {};
+        return TrackCbuf(source, code, new_cursor);
     }
     if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
         for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
@@ -186,15 +202,15 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
 std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
     // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
     // that it uses as operand
-    const auto [found, found_cursor] =
-        TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+    const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+    const auto& found = result.first;
     if (!found) {
-        return {};
+        return std::nullopt;
     }
     if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
         return immediate->GetValue();
     }
-    return {};
+    return std::nullopt;
 }
 
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,