63 files changed, 871 insertions, 545 deletions
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 949384fd3..e40d117d6 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -18,6 +18,7 @@
 #include "common/fs/fs_paths.h"
 #include "common/fs/path_util.h"
 #include "common/literals.h"
+#include "common/thread.h"
 
 #include "common/logging/backend.h"
 #include "common/logging/log.h"
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index f055f0e11..42744c994 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -111,6 +111,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
     SUB(Service, NCM)                                                                              \
     SUB(Service, NFC)                                                                              \
     SUB(Service, NFP)                                                                              \
+    SUB(Service, NGCT)                                                                             \
     SUB(Service, NIFM)                                                                             \
     SUB(Service, NIM)                                                                              \
     SUB(Service, NPNS)                                                                             \
diff --git a/src/common/logging/types.h b/src/common/logging/types.h
index 7ad0334fc..ddf9d27ca 100644
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -81,6 +81,7 @@ enum class Class : u8 {
     Service_NCM,       ///< The NCM service
     Service_NFC,       ///< The NFC (Near-field communication) service
     Service_NFP,       ///< The NFP service
+    Service_NGCT,      ///< The NGCT (No Good Content for Terra) service
     Service_NIFM,      ///< The NIFM (Network interface) service
     Service_NIM,       ///< The NIM service
     Service_NPNS,      ///< The NPNS service
diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h
new file mode 100644
index 000000000..365488ba5
--- /dev/null
+++ b/src/common/lru_cache.h
@@ -0,0 +1,140 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2+ or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <type_traits>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+template <class Traits>
+class LeastRecentlyUsedCache {
+    using ObjectType = typename Traits::ObjectType;
+    using TickType = typename Traits::TickType;
+
+    struct Item {
+        ObjectType obj;
+        TickType tick;
+        Item* next{};
+        Item* prev{};
+    };
+
+public:
+    LeastRecentlyUsedCache() : first_item{}, last_item{} {}
+    ~LeastRecentlyUsedCache() = default;
+
+    size_t Insert(ObjectType obj, TickType tick) {
+        const auto new_id = Build();
+        auto& item = item_pool[new_id];
+        item.obj = obj;
+        item.tick = tick;
+        Attach(item);
+        return new_id;
+    }
+
+    void Touch(size_t id, TickType tick) {
+        auto& item = item_pool[id];
+        if (item.tick >= tick) {
+            return;
+        }
+        item.tick = tick;
+        if (&item == last_item) {
+            return;
+        }
+        Detach(item);
+        Attach(item);
+    }
+
+    void Free(size_t id) {
+        auto& item = item_pool[id];
+        Detach(item);
+        item.prev = nullptr;
+        item.next = nullptr;
+        free_items.push_back(id);
+    }
+
+    template <typename Func>
+    void ForEachItemBelow(TickType tick, Func&& func) {
+        static constexpr bool RETURNS_BOOL =
+            std::is_same_v<std::invoke_result<Func, ObjectType>, bool>;
+        Item* iterator = first_item;
+        while (iterator) {
+            if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) {
+                return;
+            }
+            Item* next = iterator->next;
+            if constexpr (RETURNS_BOOL) {
+                if (func(iterator->obj)) {
+                    return;
+                }
+            } else {
+                func(iterator->obj);
+            }
+            iterator = next;
+        }
+    }
+
+private:
+    size_t Build() {
+        if (free_items.empty()) {
+            const size_t item_id = item_pool.size();
+            auto& item = item_pool.emplace_back();
+            item.next = nullptr;
+            item.prev = nullptr;
+            return item_id;
+        }
+        const size_t item_id = free_items.front();
+        free_items.pop_front();
+        auto& item = item_pool[item_id];
+        item.next = nullptr;
+        item.prev = nullptr;
+        return item_id;
+    }
+
+    void Attach(Item& item) {
+        if (!first_item) {
+            first_item = &item;
+        }
+        if (!last_item) {
+            last_item = &item;
+        } else {
+            item.prev = last_item;
+            last_item->next = &item;
+            item.next = nullptr;
+            last_item = &item;
+        }
+    }
+
+    void Detach(Item& item) {
+        if (item.prev) {
+            item.prev->next = item.next;
+        }
+        if (item.next) {
+            item.next->prev = item.prev;
+        }
+        if (&item == first_item) {
+            first_item = item.next;
+            if (first_item) {
+                first_item->prev = nullptr;
+            }
+        }
+        if (&item == last_item) {
+            last_item = item.prev;
+            if (last_item) {
+                last_item->next = nullptr;
+            }
+        }
+    }
+
+    std::deque<Item> item_pool;
+    std::deque<size_t> free_items;
+    Item* first_item{};
+    Item* last_item{};
+};
+
+} // namespace Common
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 996315999..fd3b639cd 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -59,7 +59,6 @@ void LogSettings() {
     log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
     log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
     log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
-    log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
     log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
     log_setting("Audio_OutputEngine", values.sink_id.GetValue());
     log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
@@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) {
     values.shader_backend.SetGlobal(true);
     values.use_asynchronous_shaders.SetGlobal(true);
     values.use_fast_gpu_time.SetGlobal(true);
-    values.use_caches_gc.SetGlobal(true);
     values.bg_red.SetGlobal(true);
     values.bg_green.SetGlobal(true);
     values.bg_blue.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 20769d310..ec4d381e8 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -475,7 +475,6 @@ struct Values {
                                                 ShaderBackend::SPIRV, "shader_backend"};
     Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
     Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
-    Setting<bool> use_caches_gc{false, "use_caches_gc"};
 
     Setting<u8> bg_red{0, "bg_red"};
     Setting<u8> bg_green{0, "bg_green"};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f5cf5c16a..87d47e2e5 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -452,6 +452,8 @@ add_library(core STATIC
     hle/service/nfp/nfp.h
     hle/service/nfp/nfp_user.cpp
     hle/service/nfp/nfp_user.h
+    hle/service/ngct/ngct.cpp
+    hle/service/ngct/ngct.h
     hle/service/nifm/nifm.cpp
     hle/service/nifm/nifm.h
     hle/service/nim/nim.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 5893a86bf..ba4629993 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -507,6 +507,12 @@ const ARM_Interface& System::CurrentArmInterface() const {
     return impl->kernel.CurrentPhysicalCore().ArmInterface();
 }
 
+std::size_t System::CurrentCoreIndex() const {
+    std::size_t core = impl->kernel.GetCurrentHostThreadID();
+    ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+    return core;
+}
+
 Kernel::PhysicalCore& System::CurrentPhysicalCore() {
     return impl->kernel.CurrentPhysicalCore();
 }
diff --git a/src/core/core.h b/src/core/core.h
index f9116ebb6..715ab88e7 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -205,6 +205,9 @@ public:
     /// Gets an ARM interface to the CPU core that is currently running
     [[nodiscard]] const ARM_Interface& CurrentArmInterface() const;
 
+    /// Gets the index of the currently running CPU core
+    [[nodiscard]] std::size_t CurrentCoreIndex() const;
+
     /// Gets the physical core for the CPU core that is currently running
     [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore();
 
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index de2e5563e..77efcabf0 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -118,18 +118,17 @@ void CpuManager::MultiCoreRunGuestLoop() {
             physical_core = &kernel.CurrentPhysicalCore();
         }
         system.ExitDynarmicProfile();
-        {
-            Kernel::KScopedDisableDispatch dd(kernel);
-            physical_core->ArmInterface().ClearExclusiveState();
-        }
+        physical_core->ArmInterface().ClearExclusiveState();
+        kernel.CurrentScheduler()->RescheduleCurrentCore();
     }
 }
 
 void CpuManager::MultiCoreRunIdleThread() {
     auto& kernel = system.Kernel();
     while (true) {
-        Kernel::KScopedDisableDispatch dd(kernel);
-        kernel.CurrentPhysicalCore().Idle();
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        physical_core.Idle();
+        kernel.CurrentScheduler()->RescheduleCurrentCore();
     }
 }
 
@@ -137,12 +136,12 @@ void CpuManager::MultiCoreRunSuspendThread() {
     auto& kernel = system.Kernel();
     kernel.CurrentScheduler()->OnThreadStart();
     while (true) {
-        auto core = kernel.CurrentPhysicalCoreIndex();
+        auto core = kernel.GetCurrentHostThreadID();
         auto& scheduler = *kernel.CurrentScheduler();
         Kernel::KThread* current_thread = scheduler.GetCurrentThread();
         Common::Fiber::YieldTo(current_thread->GetHostContext(), *core_data[core].host_context);
         ASSERT(scheduler.ContextSwitchPending());
-        ASSERT(core == kernel.CurrentPhysicalCoreIndex());
+        ASSERT(core == kernel.GetCurrentHostThreadID());
         scheduler.RescheduleCurrentCore();
     }
 }
@@ -348,11 +347,15 @@ void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
             sc_sync_first_use = false;
         }
 
-        // Emulation was stopped
-        if (stop_token.stop_requested()) {
+        // Abort if emulation was killed before the session really starts
+        if (!system.IsPoweredOn()) {
             return;
         }
 
+        if (stop_token.stop_requested()) {
+            break;
+        }
+
         auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
         data.is_running = true;
         Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index 6771ef621..1b429bc1e 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -28,7 +28,7 @@ bool ReadFromUser(Core::System& system, s32* out, VAddr address) {
 
 bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) {
     auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    const auto current_core = system.CurrentCoreIndex();
 
     // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
     // TODO(bunnei): We should call CanAccessAtomic(..) here.
@@ -58,7 +58,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
 
 bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) {
     auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    const auto current_core = system.CurrentCoreIndex();
 
     // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
     // TODO(bunnei): We should call CanAccessAtomic(..) here.
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h
index 165b76747..e4fcdbc67 100644
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -170,10 +170,6 @@ public:
         }
     }
 
-    const std::string& GetName() const {
-        return name;
-    }
-
 private:
     void RegisterWithKernel();
     void UnregisterWithKernel();
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index 4174f35fd..ef14ad1d2 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -35,7 +35,7 @@ bool WriteToUser(Core::System& system, VAddr address, const u32* p) {
 bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero,
                       u32 new_orr_mask) {
     auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    const auto current_core = system.CurrentCoreIndex();
 
     // Load the value from the address.
     const auto expected = monitor.ExclusiveRead32(current_core, address);
diff --git a/src/core/hle/kernel/k_handle_table.cpp b/src/core/hle/kernel/k_handle_table.cpp
index d720c2dda..6a420d5b0 100644
--- a/src/core/hle/kernel/k_handle_table.cpp
+++ b/src/core/hle/kernel/k_handle_table.cpp
@@ -13,7 +13,6 @@ ResultCode KHandleTable::Finalize() {
     // Get the table and clear our record of it.
     u16 saved_table_size = 0;
     {
-        KScopedDisableDispatch dd(kernel);
         KScopedSpinLock lk(m_lock);
 
         std::swap(m_table_size, saved_table_size);
@@ -44,7 +43,6 @@ bool KHandleTable::Remove(Handle handle) {
     // Find the object and free the entry.
     KAutoObject* obj = nullptr;
     {
-        KScopedDisableDispatch dd(kernel);
         KScopedSpinLock lk(m_lock);
 
         if (this->IsValidHandle(handle)) {
@@ -63,7 +61,6 @@ bool KHandleTable::Remove(Handle handle) {
 }
 
 ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) {
-    KScopedDisableDispatch dd(kernel);
     KScopedSpinLock lk(m_lock);
 
     // Never exceed our capacity.
@@ -86,7 +83,6 @@ ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) {
 }
 
 ResultCode KHandleTable::Reserve(Handle* out_handle) {
-    KScopedDisableDispatch dd(kernel);
     KScopedSpinLock lk(m_lock);
 
     // Never exceed our capacity.
@@ -97,7 +93,6 @@ ResultCode KHandleTable::Reserve(Handle* out_handle) {
 }
 
 void KHandleTable::Unreserve(Handle handle) {
-    KScopedDisableDispatch dd(kernel);
     KScopedSpinLock lk(m_lock);
 
     // Unpack the handle.
@@ -116,7 +111,6 @@ void KHandleTable::Unreserve(Handle handle) {
 }
 
 void KHandleTable::Register(Handle handle, KAutoObject* obj, u16 type) {
-    KScopedDisableDispatch dd(kernel);
     KScopedSpinLock lk(m_lock);
 
     // Unpack the handle.
diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h
index 75dcec7df..2ff6aa160 100644
--- a/src/core/hle/kernel/k_handle_table.h
+++ b/src/core/hle/kernel/k_handle_table.h
@@ -69,7 +69,6 @@ public:
     template <typename T = KAutoObject>
     KScopedAutoObject<T> GetObjectWithoutPseudoHandle(Handle handle) const {
         // Lock and look up in table.
-        KScopedDisableDispatch dd(kernel);
         KScopedSpinLock lk(m_lock);
 
         if constexpr (std::is_same_v<T, KAutoObject>) {
@@ -124,7 +123,6 @@ public:
         size_t num_opened;
         {
             // Lock the table.
-            KScopedDisableDispatch dd(kernel);
             KScopedSpinLock lk(m_lock);
             for (num_opened = 0; num_opened < num_handles; num_opened++) {
                 // Get the current handle.
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 3d7e6707e..8ead1a769 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -59,7 +59,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
     thread->GetContext64().cpu_registers[0] = 0;
     thread->GetContext32().cpu_registers[1] = thread_handle;
     thread->GetContext64().cpu_registers[1] = thread_handle;
-    thread->DisableDispatch();
 
     auto& kernel = system.Kernel();
     // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index 6ddbae52c..6a7d80d03 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -376,18 +376,20 @@ void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) {
 }
 
 void KScheduler::DisableScheduling(KernelCore& kernel) {
-    ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 0);
-    GetCurrentThreadPointer(kernel)->DisableDispatch();
+    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
+        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0);
+        scheduler->GetCurrentThread()->DisableDispatch();
+    }
 }
 
 void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling) {
-    ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 1);
-
-    if (GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() > 1) {
-        GetCurrentThreadPointer(kernel)->EnableDispatch();
-    } else {
-        RescheduleCores(kernel, cores_needing_scheduling);
+    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
+        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1);
+        if (scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1) {
+            scheduler->GetCurrentThread()->EnableDispatch();
+        }
     }
+    RescheduleCores(kernel, cores_needing_scheduling);
 }
 
 u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) {
@@ -615,17 +617,13 @@ KScheduler::KScheduler(Core::System& system_, s32 core_id_) : system{system_}, c
     state.highest_priority_thread = nullptr;
 }
 
-void KScheduler::Finalize() {
+KScheduler::~KScheduler() {
     if (idle_thread) {
         idle_thread->Close();
         idle_thread = nullptr;
     }
 }
 
-KScheduler::~KScheduler() {
-    ASSERT(!idle_thread);
-}
-
 KThread* KScheduler::GetCurrentThread() const {
     if (auto result = current_thread.load(); result) {
         return result;
@@ -644,12 +642,10 @@ void KScheduler::RescheduleCurrentCore() {
     if (phys_core.IsInterrupted()) {
         phys_core.ClearInterrupt();
     }
-
     guard.Lock();
     if (state.needs_scheduling.load()) {
         Schedule();
     } else {
-        GetCurrentThread()->EnableDispatch();
         guard.Unlock();
     }
 }
@@ -659,33 +655,26 @@ void KScheduler::OnThreadStart() {
 }
 
 void KScheduler::Unload(KThread* thread) {
-    ASSERT(thread);
-
     LOG_TRACE(Kernel, "core {}, unload thread {}", core_id, thread ? thread->GetName() : "nullptr");
 
-    if (thread->IsCallingSvc()) {
-        thread->ClearIsCallingSvc();
-    }
-
-    auto& physical_core = system.Kernel().PhysicalCore(core_id);
-    if (!physical_core.IsInitialized()) {
-        return;
-    }
-
-    Core::ARM_Interface& cpu_core = physical_core.ArmInterface();
-    cpu_core.SaveContext(thread->GetContext32());
-    cpu_core.SaveContext(thread->GetContext64());
-    // Save the TPIDR_EL0 system register in case it was modified.
-    thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
-    cpu_core.ClearExclusiveState();
-
-    if (!thread->IsTerminationRequested() && thread->GetActiveCore() == core_id) {
-        prev_thread = thread;
-    } else {
-        prev_thread = nullptr;
+    if (thread) {
+        if (thread->IsCallingSvc()) {
+            thread->ClearIsCallingSvc();
+        }
+        if (!thread->IsTerminationRequested()) {
+            prev_thread = thread;
+
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
+            cpu_core.SaveContext(thread->GetContext32());
+            cpu_core.SaveContext(thread->GetContext64());
+            // Save the TPIDR_EL0 system register in case it was modified.
+            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
+        } else {
+            prev_thread = nullptr;
+        }
+        thread->context_guard.Unlock();
     }
-
-    thread->context_guard.Unlock();
 }
 
 void KScheduler::Reload(KThread* thread) {
@@ -694,6 +683,11 @@ void KScheduler::Reload(KThread* thread) {
     if (thread) {
         ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
 
+        auto* const thread_owner_process = thread->GetOwnerProcess();
+        if (thread_owner_process != nullptr) {
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
+        }
+
         Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
         cpu_core.LoadContext(thread->GetContext32());
         cpu_core.LoadContext(thread->GetContext64());
@@ -711,7 +705,7 @@ void KScheduler::SwitchContextStep2() {
 }
 
 void KScheduler::ScheduleImpl() {
-    KThread* previous_thread = GetCurrentThread();
+    KThread* previous_thread = current_thread.load();
     KThread* next_thread = state.highest_priority_thread;
 
     state.needs_scheduling = false;
@@ -723,15 +717,10 @@ void KScheduler::ScheduleImpl() {
 
     // If we're not actually switching thread, there's nothing to do.
     if (next_thread == current_thread.load()) {
-        previous_thread->EnableDispatch();
         guard.Unlock();
         return;
     }
 
-    if (next_thread->GetCurrentCore() != core_id) {
-        next_thread->SetCurrentCore(core_id);
-    }
-
     current_thread.store(next_thread);
 
     KProcess* const previous_process = system.Kernel().CurrentProcess();
@@ -742,7 +731,11 @@ void KScheduler::ScheduleImpl() {
     Unload(previous_thread);
 
     std::shared_ptr<Common::Fiber>* old_context;
-    old_context = &previous_thread->GetHostContext();
+    if (previous_thread != nullptr) {
+        old_context = &previous_thread->GetHostContext();
+    } else {
+        old_context = &idle_thread->GetHostContext();
+    }
     guard.Unlock();
 
     Common::Fiber::YieldTo(*old_context, *switch_fiber);
diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h
index 516e0cdba..12cfae919 100644
--- a/src/core/hle/kernel/k_scheduler.h
+++ b/src/core/hle/kernel/k_scheduler.h
@@ -33,8 +33,6 @@ public:
     explicit KScheduler(Core::System& system_, s32 core_id_);
     ~KScheduler();
 
-    void Finalize();
-
     /// Reschedules to the next available thread (call after current thread is suspended)
     void RescheduleCurrentCore();
 
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index 0f6808ade..9f1d3156b 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -14,7 +14,6 @@
 #include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
-#include "common/settings.h"
 #include "common/thread_queue_list.h"
 #include "core/core.h"
 #include "core/cpu_manager.h"
@@ -189,7 +188,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
     // Setup the stack parameters.
     StackParameters& sp = GetStackParameters();
     sp.cur_thread = this;
-    sp.disable_count = 0;
+    sp.disable_count = 1;
     SetInExceptionHandler();
 
     // Set thread ID.
@@ -216,10 +215,9 @@ ResultCode KThread::InitializeThread(KThread* thread, KThreadFunction func, uint
     // Initialize the thread.
     R_TRY(thread->Initialize(func, arg, user_stack_top, prio, core, owner, type));
 
-    // Initialize emulation parameters.
+    // Initialize host context.
     thread->host_context =
         std::make_shared<Common::Fiber>(std::move(init_func), init_func_parameter);
-    thread->is_single_core = !Settings::values.use_multi_core.GetValue();
 
     return ResultSuccess;
 }
@@ -972,9 +970,6 @@ ResultCode KThread::Run() {
 
         // Set our state and finish.
         SetState(ThreadState::Runnable);
-
-        DisableDispatch();
-
         return ResultSuccess;
     }
 }
@@ -1059,16 +1054,4 @@ s32 GetCurrentCoreId(KernelCore& kernel) {
     return GetCurrentThread(kernel).GetCurrentCore();
 }
 
-KScopedDisableDispatch::~KScopedDisableDispatch() {
-    if (GetCurrentThread(kernel).GetDisableDispatchCount() <= 1) {
-        auto scheduler = kernel.CurrentScheduler();
-
-        if (scheduler) {
-            scheduler->RescheduleCurrentCore();
-        }
-    } else {
-        GetCurrentThread(kernel).EnableDispatch();
-    }
-}
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index e4c4c877d..c77f44ad4 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -450,39 +450,16 @@ public:
         sleeping_queue = q;
     }
 
-    [[nodiscard]] bool IsKernelThread() const {
-        return GetActiveCore() == 3;
-    }
-
-    [[nodiscard]] bool IsDispatchTrackingDisabled() const {
-        return is_single_core || IsKernelThread();
-    }
-
     [[nodiscard]] s32 GetDisableDispatchCount() const {
-        if (IsDispatchTrackingDisabled()) {
-            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch.
-            return 1;
-        }
-
         return this->GetStackParameters().disable_count;
     }
 
     void DisableDispatch() {
-        if (IsDispatchTrackingDisabled()) {
-            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch.
-            return;
-        }
-
         ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() >= 0);
         this->GetStackParameters().disable_count++;
     }
 
     void EnableDispatch() {
-        if (IsDispatchTrackingDisabled()) {
-            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch.
-            return;
-        }
-
         ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() > 0);
         this->GetStackParameters().disable_count--;
     }
@@ -731,7 +708,6 @@ private:
 
     // For emulation
     std::shared_ptr<Common::Fiber> host_context{};
-    bool is_single_core{};
 
     // For debugging
     std::vector<KSynchronizationObject*> wait_objects_for_debugging;
@@ -776,16 +752,4 @@ public:
     }
 };
 
-class KScopedDisableDispatch {
-public:
-    [[nodiscard]] explicit KScopedDisableDispatch(KernelCore& kernel_) : kernel{kernel_} {
-        GetCurrentThread(kernel).DisableDispatch();
-    }
-
-    ~KScopedDisableDispatch();
-
-private:
-    KernelCore& kernel;
-};
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 8fdab44e4..bea945301 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -85,9 +85,8 @@ struct KernelCore::Impl {
     }
 
     void InitializeCores() {
-        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            cores[core_id].Initialize(current_process->Is64BitProcess());
-            system.Memory().SetCurrentPageTable(*current_process, core_id);
+        for (auto& core : cores) {
+            core.Initialize(current_process->Is64BitProcess());
         }
     }
 
@@ -132,6 +131,15 @@ struct KernelCore::Impl {
         next_user_process_id = KProcess::ProcessIDMin;
         next_thread_id = 1;
 
+        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
+            if (suspend_threads[core_id]) {
+                suspend_threads[core_id]->Close();
+                suspend_threads[core_id] = nullptr;
+            }
+
+            schedulers[core_id].reset();
+        }
+
         cores.clear();
 
         global_handle_table->Finalize();
@@ -159,16 +167,6 @@ struct KernelCore::Impl {
         CleanupObject(time_shared_mem);
         CleanupObject(system_resource_limit);
 
-        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            if (suspend_threads[core_id]) {
-                suspend_threads[core_id]->Close();
-                suspend_threads[core_id] = nullptr;
-            }
-
-            schedulers[core_id]->Finalize();
-            schedulers[core_id].reset();
-        }
-
         // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
         next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
 
@@ -259,6 +257,14 @@ struct KernelCore::Impl {
 
     void MakeCurrentProcess(KProcess* process) {
         current_process = process;
+        if (process == nullptr) {
+            return;
+        }
+
+        const u32 core_id = GetCurrentHostThreadID();
+        if (core_id < Core::Hardware::NUM_CPU_CORES) {
+            system.Memory().SetCurrentPageTable(*process, core_id);
+        }
     }
 
     static inline thread_local u32 host_thread_id = UINT32_MAX;
@@ -821,20 +827,16 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
     return impl->cores[id];
 }
 
-size_t KernelCore::CurrentPhysicalCoreIndex() const {
-    const u32 core_id = impl->GetCurrentHostThreadID();
-    if (core_id >= Core::Hardware::NUM_CPU_CORES) {
-        return Core::Hardware::NUM_CPU_CORES - 1;
-    }
-    return core_id;
-}
-
 Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() {
-    return impl->cores[CurrentPhysicalCoreIndex()];
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return impl->cores[core_id];
 }
 
 const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
-    return impl->cores[CurrentPhysicalCoreIndex()];
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return impl->cores[core_id];
 }
 
 Kernel::KScheduler* KernelCore::CurrentScheduler() {
@@ -1027,9 +1029,6 @@ void KernelCore::Suspend(bool in_suspention) {
             impl->suspend_threads[core_id]->SetState(state);
             impl->suspend_threads[core_id]->SetWaitReasonForDebugging(
                 ThreadWaitReasonForDebugging::Suspended);
-            if (!should_suspend) {
-                impl->suspend_threads[core_id]->DisableDispatch();
-            }
         }
     }
 }
@@ -1044,11 +1043,13 @@ void KernelCore::ExceptionalExit() {
 }
 
 void KernelCore::EnterSVCProfile() {
-    impl->svc_ticks[CurrentPhysicalCoreIndex()] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
+    std::size_t core = impl->GetCurrentHostThreadID();
+    impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
 }
 
 void KernelCore::ExitSVCProfile() {
-    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[CurrentPhysicalCoreIndex()]);
+    std::size_t core = impl->GetCurrentHostThreadID();
+    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
 }
 
 std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 57535433b..3a6db0b1c 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -146,9 +146,6 @@ public:
     /// Gets the an instance of the respective physical CPU core.
     const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
 
-    /// Gets the current physical core index for the running host thread.
-    std::size_t CurrentPhysicalCoreIndex() const;
-
     /// Gets the sole instance of the Scheduler at the current running core.
     Kernel::KScheduler* CurrentScheduler();
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 890c52198..62fb06c45 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -877,7 +877,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, Handle
             const u64 thread_ticks = current_thread->GetCpuTime();
 
             out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);
-        } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) {
+        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
             out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;
         }
 
diff --git a/src/core/hle/service/ngct/ngct.cpp b/src/core/hle/service/ngct/ngct.cpp
new file mode 100644
index 000000000..deb3abb28
--- /dev/null
+++ b/src/core/hle/service/ngct/ngct.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included
+
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/ngct/ngct.h"
+#include "core/hle/service/service.h"
+
+namespace Service::NGCT {
+
+class IService final : public ServiceFramework<IService> {
+public:
+    explicit IService(Core::System& system_) : ServiceFramework{system_, "ngct:u"} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, nullptr, "Match"},
+            {1, &IService::Filter, "Filter"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+private:
+    void Filter(Kernel::HLERequestContext& ctx) {
+        const auto buffer = ctx.ReadBuffer();
+        const auto text = Common::StringFromFixedZeroTerminatedBuffer(
+            reinterpret_cast<const char*>(buffer.data()), buffer.size());
+
+        LOG_WARNING(Service_NGCT, "(STUBBED) called, text={}", text);
+
+        // Return the same string since we don't censor anything
+        ctx.WriteBuffer(buffer);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultSuccess);
+    }
+};
+
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
+    std::make_shared<IService>(system)->InstallAsService(system.ServiceManager());
+}
+
+} // namespace Service::NGCT
diff --git a/src/core/hle/service/ngct/ngct.h b/src/core/hle/service/ngct/ngct.h
new file mode 100644
index 000000000..1f2a47b78
--- /dev/null
+++ b/src/core/hle/service/ngct/ngct.h
@@ -0,0 +1,20 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included
+
+#pragma once
+
+namespace Core {
+class System;
+}
+
+namespace Service::SM {
+class ServiceManager;
+}
+
+namespace Service::NGCT {
+
+/// Registers all NGCT services with the specified service manager.
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
+
+} // namespace Service::NGCT
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 0a53c0c81..9decb9290 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -277,37 +277,45 @@ private:
     void GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_NIFM, "(STUBBED) called");
 
-        const SfNetworkProfileData network_profile_data{
-            .ip_setting_data{
-                .ip_address_setting{
-                    .is_automatic{true},
-                    .current_address{192, 168, 1, 100},
-                    .subnet_mask{255, 255, 255, 0},
-                    .gateway{192, 168, 1, 1},
-                },
-                .dns_setting{
-                    .is_automatic{true},
-                    .primary_dns{1, 1, 1, 1},
-                    .secondary_dns{1, 0, 0, 1},
+        const auto net_iface = Network::GetSelectedNetworkInterface();
+
+        const SfNetworkProfileData network_profile_data = [&net_iface] {
+            if (!net_iface) {
+                return SfNetworkProfileData{};
+            }
+
+            return SfNetworkProfileData{
+                .ip_setting_data{
+                    .ip_address_setting{
+                        .is_automatic{true},
+                        .current_address{Network::TranslateIPv4(net_iface->ip_address)},
+                        .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
+                        .gateway{Network::TranslateIPv4(net_iface->gateway)},
+                    },
+                    .dns_setting{
+                        .is_automatic{true},
+                        .primary_dns{1, 1, 1, 1},
+                        .secondary_dns{1, 0, 0, 1},
+                    },
+                    .proxy_setting{
+                        .enabled{false},
+                        .port{},
+                        .proxy_server{},
+                        .automatic_auth_enabled{},
+                        .user{},
+                        .password{},
+                    },
+                    .mtu{1500},
                 },
-                .proxy_setting{
-                    .enabled{false},
-                    .port{},
-                    .proxy_server{},
-                    .automatic_auth_enabled{},
-                    .user{},
-                    .password{},
+                .uuid{0xdeadbeef, 0xdeadbeef},
+                .network_name{"yuzu Network"},
+                .wireless_setting_data{
+                    .ssid_length{12},
+                    .ssid{"yuzu Network"},
+                    .passphrase{"yuzupassword"},
                 },
-                .mtu{1500},
-            },
-            .uuid{0xdeadbeef, 0xdeadbeef},
-            .network_name{"yuzu Network"},
-            .wireless_setting_data{
-                .ssid_length{12},
-                .ssid{"yuzu Network"},
-                .passphrase{"yuzupassword"},
-            },
-        };
+            };
+        }();
 
         ctx.WriteBuffer(network_profile_data);
 
@@ -352,38 +360,33 @@ private:
         LOG_WARNING(Service_NIFM, "(STUBBED) called");
 
         struct IpConfigInfo {
-            IpAddressSetting ip_address_setting;
-            DnsSetting dns_setting;
+            IpAddressSetting ip_address_setting{};
+            DnsSetting dns_setting{};
         };
         static_assert(sizeof(IpConfigInfo) == sizeof(IpAddressSetting) + sizeof(DnsSetting),
                       "IpConfigInfo has incorrect size.");
 
-        IpConfigInfo ip_config_info{
-            .ip_address_setting{
-                .is_automatic{true},
-                .current_address{0, 0, 0, 0},
-                .subnet_mask{255, 255, 255, 0},
-                .gateway{192, 168, 1, 1},
-            },
-            .dns_setting{
-                .is_automatic{true},
-                .primary_dns{1, 1, 1, 1},
-                .secondary_dns{1, 0, 0, 1},
-            },
-        };
+        const auto net_iface = Network::GetSelectedNetworkInterface();
 
-        const auto iface = Network::GetSelectedNetworkInterface();
-        if (iface) {
-            ip_config_info.ip_address_setting =
-                IpAddressSetting{.is_automatic{true},
-                                 .current_address{Network::TranslateIPv4(iface->ip_address)},
-                                 .subnet_mask{Network::TranslateIPv4(iface->subnet_mask)},
-                                 .gateway{Network::TranslateIPv4(iface->gateway)}};
+        const IpConfigInfo ip_config_info = [&net_iface] {
+            if (!net_iface) {
+                return IpConfigInfo{};
+            }
 
-        } else {
-            LOG_ERROR(Service_NIFM,
-                      "Couldn't get host network configuration info, using default values");
-        }
+            return IpConfigInfo{
+                .ip_address_setting{
+                    .is_automatic{true},
+                    .current_address{Network::TranslateIPv4(net_iface->ip_address)},
+                    .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
+                    .gateway{Network::TranslateIPv4(net_iface->gateway)},
+                },
+                .dns_setting{
+                    .is_automatic{true},
+                    .primary_dns{1, 1, 1, 1},
+                    .secondary_dns{1, 0, 0, 1},
+                },
+            };
+        }();
 
         IPC::ResponseBuilder rb{ctx, 2 + (sizeof(IpConfigInfo) + 3) / sizeof(u32)};
         rb.Push(ResultSuccess);
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index ce6065db2..a33e47d0b 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                         u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
                         const Common::Rectangle<int>& crop_rect) {
-    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
+    const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
     LOG_TRACE(Service,
               "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
               addr, offset, width, height, stride, format);
 
-    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
-    const Tegra::FramebufferConfig framebuffer{
-        addr,      offset,   width, height, stride, static_cast<PixelFormat>(format),
-        transform, crop_rect};
+    const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
+    const Tegra::FramebufferConfig framebuffer{addr,   offset,       width,     height,
+                                               stride, pixel_format, transform, crop_rect};
 
     system.GetPerfStats().EndSystemFrame();
     system.GPU().SwapBuffers(&framebuffer);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index 759247eb0..78de3f354 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -42,7 +42,9 @@ struct IGBPBuffer {
     u32_le index;
     INSERT_PADDING_WORDS(3);
     u32_le gpu_buffer_id;
-    INSERT_PADDING_WORDS(17);
+    INSERT_PADDING_WORDS(6);
+    u32_le external_format;
+    INSERT_PADDING_WORDS(10);
     u32_le nvmap_handle;
     u32_le offset;
     INSERT_PADDING_WORDS(60);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 00bff8caf..3ead813b0 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -298,7 +298,7 @@ void NVFlinger::Compose() {
         auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
         ASSERT(nvdisp);
 
-        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
+        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,
                      igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                      buffer->get().transform, buffer->get().crop_rect);
 
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index b3e50433b..065133166 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -46,6 +46,7 @@
 #include "core/hle/service/ncm/ncm.h"
 #include "core/hle/service/nfc/nfc.h"
 #include "core/hle/service/nfp/nfp.h"
+#include "core/hle/service/ngct/ngct.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/nim/nim.h"
 #include "core/hle/service/npns/npns.h"
@@ -271,6 +272,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system
     NCM::InstallInterfaces(*sm, system);
     NFC::InstallInterfaces(*sm, system);
     NFP::InstallInterfaces(*sm, system);
+    NGCT::InstallInterfaces(*sm, system);
     NIFM::InstallInterfaces(*sm, system);
     NIM::InstallInterfaces(*sm, system);
     NPNS::InstallInterfaces(*sm, system);
diff --git a/src/core/network/network_interface.cpp b/src/core/network/network_interface.cpp
index cecc9aa11..6811f21b1 100644
--- a/src/core/network/network_interface.cpp
+++ b/src/core/network/network_interface.cpp
@@ -37,73 +37,73 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
             AF_INET, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_INCLUDE_GATEWAYS,
             nullptr, adapter_addresses.data(), &buf_size);
 
-        if (ret == ERROR_BUFFER_OVERFLOW) {
-            adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);
-        } else {
+        if (ret != ERROR_BUFFER_OVERFLOW) {
             break;
         }
+
+        adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);
     }
 
-    if (ret == NO_ERROR) {
-        std::vector<NetworkInterface> result;
+    if (ret != NO_ERROR) {
+        LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses");
+        return {};
+    }
 
-        for (auto current_address = adapter_addresses.data(); current_address != nullptr;
-             current_address = current_address->Next) {
-            if (current_address->FirstUnicastAddress == nullptr ||
-                current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) {
-                continue;
-            }
+    std::vector<NetworkInterface> result;
 
-            if (current_address->OperStatus != IfOperStatusUp) {
-                continue;
-            }
+    for (auto current_address = adapter_addresses.data(); current_address != nullptr;
+         current_address = current_address->Next) {
+        if (current_address->FirstUnicastAddress == nullptr ||
+            current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) {
+            continue;
+        }
 
-            const auto ip_addr = Common::BitCast<struct sockaddr_in>(
-                                     *current_address->FirstUnicastAddress->Address.lpSockaddr)
-                                     .sin_addr;
+        if (current_address->OperStatus != IfOperStatusUp) {
+            continue;
+        }
 
-            ULONG mask = 0;
-            if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength,
-                                        &mask) != NO_ERROR) {
-                LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask");
-                continue;
-            }
+        const auto ip_addr = Common::BitCast<struct sockaddr_in>(
+                                 *current_address->FirstUnicastAddress->Address.lpSockaddr)
+                                 .sin_addr;
 
-            struct in_addr gateway = {.S_un{.S_addr{0}}};
-            if (current_address->FirstGatewayAddress != nullptr &&
-                current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) {
-                gateway = Common::BitCast<struct sockaddr_in>(
-                              *current_address->FirstGatewayAddress->Address.lpSockaddr)
-                              .sin_addr;
-            }
+        ULONG mask = 0;
+        if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength,
+                                    &mask) != NO_ERROR) {
+            LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask");
+            continue;
+        }
 
-            result.push_back(NetworkInterface{
-                .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})},
-                .ip_address{ip_addr},
-                .subnet_mask = in_addr{.S_un{.S_addr{mask}}},
-                .gateway = gateway});
+        struct in_addr gateway = {.S_un{.S_addr{0}}};
+        if (current_address->FirstGatewayAddress != nullptr &&
+            current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) {
+            gateway = Common::BitCast<struct sockaddr_in>(
+                          *current_address->FirstGatewayAddress->Address.lpSockaddr)
+                          .sin_addr;
         }
 
-        return result;
-    } else {
-        LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses");
-        return {};
+        result.emplace_back(NetworkInterface{
+            .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})},
+            .ip_address{ip_addr},
+            .subnet_mask = in_addr{.S_un{.S_addr{mask}}},
+            .gateway = gateway});
     }
+
+    return result;
 }
 
 #else
 
 std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
-    std::vector<NetworkInterface> result;
-
     struct ifaddrs* ifaddr = nullptr;
 
     if (getifaddrs(&ifaddr) != 0) {
         LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",
                   std::strerror(errno));
-        return result;
+        return {};
     }
 
+    std::vector<NetworkInterface> result;
+
     for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
         if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {
             continue;
@@ -117,55 +117,62 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
             continue;
         }
 
-        std::uint32_t gateway{0};
+        u32 gateway{};
+
         std::ifstream file{"/proc/net/route"};
-        if (file.is_open()) {
+        if (!file.is_open()) {
+            LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
 
-            // ignore header
-            file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+            result.emplace_back(NetworkInterface{
+                .name{ifa->ifa_name},
+                .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
+                .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
+                .gateway{in_addr{.s_addr = gateway}}});
+            continue;
+        }
 
-            bool gateway_found = false;
+        // ignore header
+        file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
 
-            for (std::string line; std::getline(file, line);) {
-                std::istringstream iss{line};
+        bool gateway_found = false;
 
-                std::string iface_name{};
-                iss >> iface_name;
-                if (iface_name != ifa->ifa_name) {
-                    continue;
-                }
+        for (std::string line; std::getline(file, line);) {
+            std::istringstream iss{line};
 
-                iss >> std::hex;
+            std::string iface_name;
+            iss >> iface_name;
+            if (iface_name != ifa->ifa_name) {
+                continue;
+            }
 
-                std::uint32_t dest{0};
-                iss >> dest;
-                if (dest != 0) {
-                    // not the default route
-                    continue;
-                }
+            iss >> std::hex;
 
-                iss >> gateway;
+            u32 dest{};
+            iss >> dest;
+            if (dest != 0) {
+                // not the default route
+                continue;
+            }
 
-                std::uint16_t flags{0};
-                iss >> flags;
+            iss >> gateway;
 
-                // flag RTF_GATEWAY (defined in <linux/route.h>)
-                if ((flags & 0x2) == 0) {
-                    continue;
-                }
+            u16 flags{};
+            iss >> flags;
 
-                gateway_found = true;
-                break;
+            // flag RTF_GATEWAY (defined in <linux/route.h>)
+            if ((flags & 0x2) == 0) {
+                continue;
             }
 
-            if (!gateway_found) {
-                gateway = 0;
-            }
-        } else {
-            LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
+            gateway_found = true;
+            break;
         }
 
-        result.push_back(NetworkInterface{
+        if (!gateway_found) {
+            gateway = 0;
+        }
+
+        result.emplace_back(NetworkInterface{
             .name{ifa->ifa_name},
             .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
             .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
@@ -180,11 +187,11 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
 #endif
 
 std::optional<NetworkInterface> GetSelectedNetworkInterface() {
-    const std::string& selected_network_interface = Settings::values.network_interface.GetValue();
+    const auto& selected_network_interface = Settings::values.network_interface.GetValue();
     const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
     if (network_interfaces.size() == 0) {
         LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
-        return {};
+        return std::nullopt;
     }
 
     const auto res =
@@ -192,12 +199,12 @@ std::optional<NetworkInterface> GetSelectedNetworkInterface() {
             return iface.name == selected_network_interface;
         });
 
-    if (res != network_interfaces.end()) {
-        return *res;
-    } else {
+    if (res == network_interfaces.end()) {
         LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface);
-        return {};
+        return std::nullopt;
     }
+
+    return *res;
 }
 
 } // namespace Network
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2d29d8c14..2885e6799 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@
 
 namespace Shader::Backend::SPIRV {
 namespace {
+constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
+
 enum class Operation {
     Increment,
     Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
         return pointer_type;
     }
 }
+
+size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
+                              size_t start_offset) {
+    for (size_t location = start_offset; location < used_locations.size(); ++location) {
+        if (!used_locations.test(location)) {
+            return location;
+        }
+    }
+    throw RuntimeError("Unable to get an unused location for legacy attribute");
+}
 } // Anonymous namespace
 
 void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
         loads[IR::Attribute::TessellationEvaluationPointV]) {
         tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
     }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
     for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
         const AttributeType input_type{runtime_info.generic_input_types[index]};
         if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
         if (input_type == AttributeType::Disabled) {
             continue;
         }
+        used_locations.set(index);
         const Id type{GetAttributeType(*this, input_type)};
         const Id id{DefineInput(*this, type, true)};
         Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
             break;
         }
     }
+    size_t previous_unused_location = 0;
+    if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineInput(*this, F32[4], true)};
+        Decorate(id, spv::Decoration::Location, location);
+        input_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineInput(*this, F32[4], true)};
+            Decorate(id, spv::Decoration::Location, location);
+            input_fixed_fnc_textures[index] = id;
+        }
+    }
     if (stage == Stage::TessellationEval) {
         for (size_t index = 0; index < info.uses_patches.size(); ++index) {
             if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
         viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
                                      spv::BuiltIn::ViewportMaskNV);
     }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
     for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
         if (info.stores.Generic(index)) {
             DefineGenericOutput(*this, index, invocations);
+            used_locations.set(index);
+        }
+    }
+    size_t previous_unused_location = 0;
+    if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineOutput(*this, F32[4], invocations)};
+        Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
+        output_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineOutput(*this, F32[4], invocations)};
+            Decorate(id, spv::Decoration::Location, location);
+            output_fixed_fnc_textures[index] = id;
         }
     }
     switch (stage) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e277bc358..847d0c0e6 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
     Id write_global_func_u32x4{};
 
     Id input_position{};
+    Id input_front_color{};
+    std::array<Id, 10> input_fixed_fnc_textures{};
     std::array<Id, 32> input_generics{};
 
     Id output_point_size{};
     Id output_position{};
+    Id output_front_color{};
+    std::array<Id, 10> output_fixed_fnc_textures{};
     std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
 
     Id output_tess_level_outer{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 14c77f162..68f360b3c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
     }
 }
 
+bool IsFixedFncTexture(IR::Attribute attribute) {
+    return attribute >= IR::Attribute::FixedFncTexture0S &&
+           attribute <= IR::Attribute::FixedFncTexture9Q;
+}
+
+u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
+}
+
+u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return static_cast<u32>(attribute) % 4u;
+}
+
 template <typename... Args>
 Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
     if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
             return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
         }
     }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const u32 element{FixedFncTextureAttributeElement(attr)};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
+                                 element_id);
+    }
     switch (attr) {
     case IR::Attribute::PointSize:
         return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
         const Id element_id{ctx.Const(element)};
         return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
     }
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        const u32 element{static_cast<u32>(attr) % 4};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
+    }
     case IR::Attribute::ClipDistance0:
     case IR::Attribute::ClipDistance1:
     case IR::Attribute::ClipDistance2:
@@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
         const Id value{ctx.OpLoad(type->id, pointer)};
         return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
     }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const Id attr_id{ctx.input_fixed_fnc_textures[index]};
+        const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
+        return ctx.OpLoad(ctx.F32[1], attr_ptr);
+    }
     switch (attr) {
     case IR::Attribute::PrimitiveId:
         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
     case IR::Attribute::PositionW:
         return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
                                                   ctx.Const(element)));
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
+                                                  ctx.Const(element)));
+    }
     case IR::Attribute::InstanceId:
         if (ctx.profile.support_vertex_instance_id) {
             return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -333,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
             return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
         }
     case IR::Attribute::FrontFace:
-        return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
-                            ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
+        return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
+                            ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
+                            ctx.f32_zero_value);
     case IR::Attribute::PointSpriteS:
         return ctx.OpLoad(ctx.F32[1],
                           ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 8b3e0a15c..69eeaa3e6 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
 #include "shader_recompiler/frontend/maxwell/decode.h"
 #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/host_translate_info.h"
 #include "shader_recompiler/object_pool.h"
 
 namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
 public:
     TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
                   ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
-                  IR::AbstractSyntaxList& syntax_list_)
+                  IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
         : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
           syntax_list{syntax_list_} {
         Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
         IR::Block& first_block{*syntax_list.front().data.block};
         IR::IREmitter ir(first_block, first_block.begin());
         ir.Prologue();
+        if (uses_demote_to_helper && host_info.needs_demote_reorder) {
+            DemoteCombinationPass();
+        }
     }
 
 private:
@@ -809,7 +813,14 @@ private:
             }
             case StatementType::Return: {
                 ensure_block();
-                IR::IREmitter{*current_block}.Epilogue();
+                IR::Block* return_block{block_pool.Create(inst_pool)};
+                IR::IREmitter{*return_block}.Epilogue();
+                current_block->AddBranch(return_block);
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = return_block;
+
                 current_block = nullptr;
                 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
                 break;
@@ -824,6 +835,7 @@ private:
                 auto& merge{syntax_list.emplace_back()};
                 merge.type = IR::AbstractSyntaxNode::Type::Block;
                 merge.data.block = demote_block;
+                uses_demote_to_helper = true;
                 break;
             }
             case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
         return block_pool.Create(inst_pool);
     }
 
+    void DemoteCombinationPass() {
+        using Type = IR::AbstractSyntaxNode::Type;
+        std::vector<IR::Block*> demote_blocks;
+        std::vector<IR::U1> demote_conds;
+        u32 num_epilogues{};
+        u32 branch_depth{};
+        for (const IR::AbstractSyntaxNode& node : syntax_list) {
+            if (node.type == Type::If) {
+                ++branch_depth;
+            }
+            if (node.type == Type::EndIf) {
+                --branch_depth;
+            }
+            if (node.type != Type::Block) {
+                continue;
+            }
+            if (branch_depth > 1) {
+                // Skip reordering nested demote branches.
+                continue;
+            }
+            for (const IR::Inst& inst : node.data.block->Instructions()) {
+                const IR::Opcode op{inst.GetOpcode()};
+                if (op == IR::Opcode::DemoteToHelperInvocation) {
+                    demote_blocks.push_back(node.data.block);
+                    break;
+                }
+                if (op == IR::Opcode::Epilogue) {
+                    ++num_epilogues;
+                }
+            }
+        }
+        if (demote_blocks.size() == 0) {
+            return;
+        }
+        if (num_epilogues > 1) {
+            LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
+            return;
+        }
+        s64 last_iterator_offset{};
+        auto& asl{syntax_list};
+        for (const IR::Block* demote_block : demote_blocks) {
+            const auto start_it{asl.begin() + last_iterator_offset};
+            auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::If && asn.data.if_node.body == demote_block;
+            })};
+            if (asl_it == asl.end()) {
+                // Demote without a conditional branch.
+                // No need to proceed since all fragment instances will be demoted regardless.
+                return;
+            }
+            const IR::Block* const end_if = asl_it->data.if_node.merge;
+            demote_conds.push_back(asl_it->data.if_node.cond);
+            last_iterator_offset = std::distance(asl.begin(), asl_it);
+
+            asl_it = asl.erase(asl_it);
+            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::Block && asn.data.block == demote_block;
+            });
+
+            asl_it = asl.erase(asl_it);
+            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
+            });
+            asl_it = asl.erase(asl_it);
+        }
+        const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
+            if (asn.type != Type::Block) {
+                return false;
+            }
+            for (const auto& inst : asn.data.block->Instructions()) {
+                if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+                    return true;
+                }
+            }
+            return false;
+        }};
+        const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
+        const auto return_block_it{(reverse_it + 1).base()};
+
+        IR::IREmitter ir{*(return_block_it - 1)->data.block};
+        IR::U1 cond(IR::Value(false));
+        for (const auto& demote_cond : demote_conds) {
+            cond = ir.LogicalOr(cond, demote_cond);
+        }
+        cond.Inst()->DestructiveAddUsage(1);
+
+        IR::AbstractSyntaxNode demote_if_node{};
+        demote_if_node.type = Type::If;
+        demote_if_node.data.if_node.cond = cond;
+        demote_if_node.data.if_node.body = demote_blocks[0];
+        demote_if_node.data.if_node.merge = return_block_it->data.block;
+
+        IR::AbstractSyntaxNode demote_node{};
+        demote_node.type = Type::Block;
+        demote_node.data.block = demote_blocks[0];
+
+        IR::AbstractSyntaxNode demote_endif_node{};
+        demote_endif_node.type = Type::EndIf;
+        demote_endif_node.data.end_if.merge = return_block_it->data.block;
+
+        asl.insert(return_block_it, demote_endif_node);
+        asl.insert(return_block_it, demote_node);
+        asl.insert(return_block_it, demote_if_node);
+    }
+
     ObjectPool<Statement>& stmt_pool;
     ObjectPool<IR::Inst>& inst_pool;
     ObjectPool<IR::Block>& block_pool;
     Environment& env;
     IR::AbstractSyntaxList& syntax_list;
+    bool uses_demote_to_helper{};
 
 // TODO: C++20 Remove this when all compilers support constexpr std::vector
 #if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
 } // Anonymous namespace
 
 IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                                Environment& env, Flow::CFG& cfg) {
+                                Environment& env, Flow::CFG& cfg,
+                                const HostTranslateInfo& host_info) {
     ObjectPool<Statement> stmt_pool{64};
     GotoPass goto_pass{cfg, stmt_pool};
     Statement& root{goto_pass.RootStatement()};
     IR::AbstractSyntaxList syntax_list;
-    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
     return syntax_list;
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index 88b083649..e38158da3 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
 
-namespace Shader::Maxwell {
+namespace Shader {
+struct HostTranslateInfo;
+namespace Maxwell {
 
 [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
                                               ObjectPool<IR::Block>& block_pool, Environment& env,
-                                              Flow::CFG& cfg);
+                                              Flow::CFG& cfg, const HostTranslateInfo& host_info);
 
-} // namespace Shader::Maxwell
+} // namespace Maxwell
+} // namespace Shader
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index c067d459c..012d55357 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
 IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
                              Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
     IR::Program program;
-    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
     program.blocks = GenerateBlocks(program.syntax_list);
     program.post_order_blocks = PostOrder(program.syntax_list.front());
     program.stage = env.ShaderStage();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 94a584219..96468b2e7 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {
 
 /// Misc information about the host
 struct HostTranslateInfo {
-    bool support_float16{}; ///< True when the device supports 16-bit floats
-    bool support_int64{};   ///< True when the device supports 64-bit integers
+    bool support_float16{};      ///< True when the device supports 16-bit floats
+    bool support_int64{};        ///< True when the device supports 64-bit integers
+    bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
 };
 
 } // namespace Shader
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index c3318095c..be2113f5a 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -261,16 +261,6 @@ public:
         stream_score += score;
     }
 
-    /// Sets the new frame tick
-    void SetFrameTick(u64 new_frame_tick) noexcept {
-        frame_tick = new_frame_tick;
-    }
-
-    /// Returns the new frame tick
-    [[nodiscard]] u64 FrameTick() const noexcept {
-        return frame_tick;
-    }
-
     /// Returns the likeliness of this being a stream buffer
     [[nodiscard]] int StreamScore() const noexcept {
         return stream_score;
@@ -307,6 +297,14 @@ public:
         return words.size_bytes;
     }
 
+    size_t getLRUID() const noexcept {
+        return lru_id;
+    }
+
+    void setLRUID(size_t lru_id_) {
+        lru_id = lru_id_;
+    }
+
 private:
     template <Type type>
     u64* Array() noexcept {
@@ -603,9 +601,9 @@ private:
     RasterizerInterface* rasterizer = nullptr;
     VAddr cpu_addr = 0;
     Words words;
-    u64 frame_tick = 0;
     BufferFlagBits flags{};
     int stream_score = 0;
+    size_t lru_id = SIZE_MAX;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 3b43554f9..7bfd57369 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -20,6 +20,7 @@
 #include "common/common_types.h"
 #include "common/div_ceil.h"
 #include "common/literals.h"
+#include "common/lru_cache.h"
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
@@ -330,7 +331,7 @@ private:
     template <bool insert>
     void ChangeRegister(BufferId buffer_id);
 
-    void TouchBuffer(Buffer& buffer) const noexcept;
+    void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
 
     bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
 
@@ -428,7 +429,11 @@ private:
     size_t immediate_buffer_capacity = 0;
     std::unique_ptr<u8[]> immediate_buffer_alloc;
 
-    typename SlotVector<Buffer>::Iterator deletion_iterator;
+    struct LRUItemParams {
+        using ObjectType = BufferId;
+        using TickType = u64;
+    };
+    Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
     u64 frame_tick = 0;
     u64 total_used_memory = 0;
 
@@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
     // Ensure the first slot is used for the null buffer
     void(slot_buffers.insert(runtime, NullBufferParams{}));
-    deletion_iterator = slot_buffers.end();
     common_ranges.clear();
 }
 
@@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {
     const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
     const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
     int num_iterations = aggressive_gc ? 64 : 32;
-    for (; num_iterations > 0; --num_iterations) {
-        if (deletion_iterator == slot_buffers.end()) {
-            deletion_iterator = slot_buffers.begin();
-        }
-        ++deletion_iterator;
-        if (deletion_iterator == slot_buffers.end()) {
-            break;
-        }
-        const auto [buffer_id, buffer] = *deletion_iterator;
-        if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
-            DownloadBufferMemory(*buffer);
-            DeleteBuffer(buffer_id);
+    const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
+        if (num_iterations == 0) {
+            return true;
         }
-    }
+        --num_iterations;
+        auto& buffer = slot_buffers[buffer_id];
+        DownloadBufferMemory(buffer);
+        DeleteBuffer(buffer_id);
+        return false;
+    };
+    lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
 }
 
 template <class P>
@@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() {
     const bool skip_preferred = hits * 256 < shots * 251;
     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
 
-    if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
+    if (total_used_memory >= EXPECTED_MEMORY) {
         RunGarbageCollector();
     }
     ++frame_tick;
@@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
 template <class P>
 void BufferCache<P>::BindHostIndexBuffer() {
     Buffer& buffer = slot_buffers[index_buffer.buffer_id];
-    TouchBuffer(buffer);
+    TouchBuffer(buffer, index_buffer.buffer_id);
     const u32 offset = buffer.Offset(index_buffer.cpu_addr);
     const u32 size = index_buffer.size;
     SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
     for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
         const Binding& binding = vertex_buffers[index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
         SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
         if (!flags[Dirty::VertexBuffer0 + index]) {
             continue;
@@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
     const VAddr cpu_addr = binding.cpu_addr;
     const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
     Buffer& buffer = slot_buffers[binding.buffer_id];
-    TouchBuffer(buffer);
+    TouchBuffer(buffer, binding.buffer_id);
     const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
                                  size <= uniform_buffer_skip_cache_size &&
                                  !buffer.IsRegionGpuModified(cpu_addr, size);
@@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
     ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
         const Binding& binding = storage_buffers[stage][index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
@@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
     for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
         const Binding& binding = transform_feedback_buffers[index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
@@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
     ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
         const Binding& binding = compute_uniform_buffers[index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
         const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
@@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
     ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
         const Binding& binding = compute_storage_buffers[index];
         Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
@@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
     const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
     const u32 size = static_cast<u32>(overlap.end - overlap.begin);
     const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
-    TouchBuffer(slot_buffers[new_buffer_id]);
     for (const BufferId overlap_id : overlap.ids) {
         JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
     }
     Register(new_buffer_id);
+    TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);
     return new_buffer_id;
 }
 
@@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {
 template <class P>
 template <bool insert>
 void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
-    const Buffer& buffer = slot_buffers[buffer_id];
+    Buffer& buffer = slot_buffers[buffer_id];
     const auto size = buffer.SizeBytes();
     if (insert) {
         total_used_memory += Common::AlignUp(size, 1024);
+        buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
     } else {
         total_used_memory -= Common::AlignUp(size, 1024);
+        lru_cache.Free(buffer.getLRUID());
     }
     const VAddr cpu_addr_begin = buffer.CpuAddr();
     const VAddr cpu_addr_end = cpu_addr_begin + size;
@@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
 }
 
 template <class P>
-void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
-    buffer.SetFrameTick(frame_tick);
+void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
+    if (buffer_id != NULL_BUFFER_ID) {
+        lru_cache.Touch(buffer.getLRUID(), frame_tick);
+    }
 }
 
 template <class P>
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 70030066a..d7e749485 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
     uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
     uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
 
+    ASSERT(!current_frame_info.segment_enabled);
     uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
 
     const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 87eafdb03..3b1ed4b3a 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -22,7 +22,7 @@ struct Vp9FrameDimensions {
 };
 static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
 
-enum FrameFlags : u32 {
+enum class FrameFlags : u32 {
     IsKeyFrame = 1 << 0,
     LastFrameIsKeyFrame = 1 << 1,
     FrameSizeChanged = 1 << 2,
@@ -30,6 +30,7 @@ enum FrameFlags : u32 {
     LastShowFrame = 1 << 4,
     IntraOnly = 1 << 5,
 };
+DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)
 
 enum class TxSize {
     Tx4x4 = 0,   // 4x4 transform
@@ -92,44 +93,34 @@ struct Vp9EntropyProbs {
 static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
 
 struct Vp9PictureInfo {
-    bool is_key_frame;
-    bool intra_only;
-    bool last_frame_was_key;
-    bool frame_size_changed;
-    bool error_resilient_mode;
-    bool last_frame_shown;
-    bool show_frame;
+    u32 bitstream_size;
+    std::array<u64, 4> frame_offsets;
     std::array<s8, 4> ref_frame_sign_bias;
     s32 base_q_index;
     s32 y_dc_delta_q;
     s32 uv_dc_delta_q;
     s32 uv_ac_delta_q;
-    bool lossless;
     s32 transform_mode;
-    bool allow_high_precision_mv;
     s32 interp_filter;
     s32 reference_mode;
-    s8 comp_fixed_ref;
-    std::array<s8, 2> comp_var_ref;
     s32 log2_tile_cols;
     s32 log2_tile_rows;
-    bool segment_enabled;
-    bool segment_map_update;
-    bool segment_map_temporal_update;
-    s32 segment_abs_delta;
-    std::array<u32, 8> segment_feature_enable;
-    std::array<std::array<s16, 4>, 8> segment_feature_data;
-    bool mode_ref_delta_enabled;
-    bool use_prev_in_find_mv_refs;
     std::array<s8, 4> ref_deltas;
     std::array<s8, 2> mode_deltas;
     Vp9EntropyProbs entropy;
     Vp9FrameDimensions frame_size;
     u8 first_level;
     u8 sharpness_level;
-    u32 bitstream_size;
-    std::array<u64, 4> frame_offsets;
-    std::array<bool, 4> refresh_frame;
+    bool is_key_frame;
+    bool intra_only;
+    bool last_frame_was_key;
+    bool error_resilient_mode;
+    bool last_frame_shown;
+    bool show_frame;
+    bool lossless;
+    bool allow_high_precision_mv;
+    bool segment_enabled;
+    bool mode_ref_delta_enabled;
 };
 
 struct Vp9FrameContainer {
@@ -145,7 +136,7 @@ struct PictureInfo {
     Vp9FrameDimensions golden_frame_size;  ///< 0x50
     Vp9FrameDimensions alt_frame_size;     ///< 0x58
     Vp9FrameDimensions current_frame_size; ///< 0x60
-    u32 vp9_flags;                         ///< 0x68
+    FrameFlags vp9_flags;                  ///< 0x68
     std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
     u8 first_level;                        ///< 0x70
     u8 sharpness_level;                    ///< 0x71
@@ -158,60 +149,43 @@ struct PictureInfo {
     u8 allow_high_precision_mv;            ///< 0x78
     u8 interp_filter;                      ///< 0x79
     u8 reference_mode;                     ///< 0x7A
-    s8 comp_fixed_ref;                     ///< 0x7B
-    std::array<s8, 2> comp_var_ref;        ///< 0x7C
+    INSERT_PADDING_BYTES_NOINIT(3);        ///< 0x7B
     u8 log2_tile_cols;                     ///< 0x7E
     u8 log2_tile_rows;                     ///< 0x7F
     Segmentation segmentation;             ///< 0x80
     LoopFilter loop_filter;                ///< 0xE4
-    INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB
-    u32 surface_params;                    ///< 0xF0
-    INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4
+    INSERT_PADDING_BYTES_NOINIT(21);       ///< 0xEB
 
     [[nodiscard]] Vp9PictureInfo Convert() const {
         return {
-            .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
-            .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
-            .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
-            .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
-            .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
-            .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
-            .show_frame = true,
+            .bitstream_size = bitstream_size,
+            .frame_offsets{},
             .ref_frame_sign_bias = ref_frame_sign_bias,
             .base_q_index = base_q_index,
             .y_dc_delta_q = y_dc_delta_q,
             .uv_dc_delta_q = uv_dc_delta_q,
             .uv_ac_delta_q = uv_ac_delta_q,
-            .lossless = lossless != 0,
             .transform_mode = tx_mode,
-            .allow_high_precision_mv = allow_high_precision_mv != 0,
             .interp_filter = interp_filter,
             .reference_mode = reference_mode,
-            .comp_fixed_ref = comp_fixed_ref,
-            .comp_var_ref = comp_var_ref,
             .log2_tile_cols = log2_tile_cols,
             .log2_tile_rows = log2_tile_rows,
-            .segment_enabled = segmentation.enabled != 0,
-            .segment_map_update = segmentation.update_map != 0,
-            .segment_map_temporal_update = segmentation.temporal_update != 0,
-            .segment_abs_delta = segmentation.abs_delta,
-            .segment_feature_enable = segmentation.feature_mask,
-            .segment_feature_data = segmentation.feature_data,
-            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
-            .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
-                                        !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
-                                        !(vp9_flags == (FrameFlags::IntraOnly)) &&
-                                        (vp9_flags == (FrameFlags::LastShowFrame)) &&
-                                        !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
             .ref_deltas = loop_filter.ref_deltas,
             .mode_deltas = loop_filter.mode_deltas,
             .entropy{},
             .frame_size = current_frame_size,
             .first_level = first_level,
             .sharpness_level = sharpness_level,
-            .bitstream_size = bitstream_size,
-            .frame_offsets{},
-            .refresh_frame{},
+            .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
+            .intra_only = True(vp9_flags & FrameFlags::IntraOnly),
+            .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
+            .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
+            .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
+            .show_frame = true,
+            .lossless = lossless != 0,
+            .allow_high_precision_mv = allow_high_precision_mv != 0,
+            .segment_enabled = segmentation.enabled != 0,
+            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
         };
     }
 };
@@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48);
 ASSERT_POSITION(first_level, 0x70);
 ASSERT_POSITION(segmentation, 0x80);
 ASSERT_POSITION(loop_filter, 0xE4);
-ASSERT_POSITION(surface_params, 0xF0);
 #undef ASSERT_POSITION
 
 #define ASSERT_POSITION(field_name, position)                                                      \
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1aa43523a..7f4ca6282 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -475,10 +475,10 @@ public:
 
                 // These values are used by Nouveau and some games.
                 AddGL = 0x8006,
-                SubtractGL = 0x8007,
-                ReverseSubtractGL = 0x8008,
-                MinGL = 0x800a,
-                MaxGL = 0x800b
+                MinGL = 0x8007,
+                MaxGL = 0x8008,
+                SubtractGL = 0x800a,
+                ReverseSubtractGL = 0x800b
             };
 
             enum class Factor : u32 {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee992aed4..de9e41659 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
         return shader_backend;
     }
 
+    bool IsAmd() const {
+        return vendor_name == "ATI Technologies Inc.";
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1f4dda17e..b0e14182e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
       host_info{
           .support_float16 = false,
           .support_int64 = device.HasShaderInt64(),
+          .needs_demote_reorder = device.IsAmd(),
       } {
     if (use_asynchronous_shaders) {
         workers = CreateWorkers();
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7c9b0d6db..9ff0a28cd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
         blit_screen.Recreate();
     }
     const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
-    scheduler.Flush(render_semaphore);
+    const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
+    scheduler.Flush(render_semaphore, present_semaphore);
     scheduler.WaitWorker();
     swapchain.Present(render_semaphore);
 
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 3a78c9daa..888bc7392 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
 
         const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
         const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
-        const size_t size_bytes = GetSizeInBytes(framebuffer);
 
         // TODO(Rodrigo): Read this from HLE
         constexpr u32 block_height_log2 = 4;
         const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
+        const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
+                                                           framebuffer.stride, framebuffer.height,
+                                                           1, block_height_log2, 0)};
         Tegra::Texture::UnswizzleTexture(
             mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
             bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f316c4f92..31bfbcb06 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
     host_info = Shader::HostTranslateInfo{
         .support_float16 = device.IsFloat16Supported(),
         .support_int64 = device.IsShaderInt64Supported(),
+        .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
+                                driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
     };
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 4840962de..1d438787a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {
     worker_thread.join();
 }
 
-void VKScheduler::Flush(VkSemaphore semaphore) {
-    SubmitExecution(semaphore);
+void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    SubmitExecution(signal_semaphore, wait_semaphore);
     AllocateNewContext();
 }
 
-void VKScheduler::Finish(VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
     const u64 presubmit_tick = CurrentTick();
-    SubmitExecution(semaphore);
+    SubmitExecution(signal_semaphore, wait_semaphore);
     WaitWorker();
     Wait(presubmit_tick);
     AllocateNewContext();
@@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
     });
 }
 
-void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
+void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
     EndPendingOperations();
     InvalidateState();
 
     const u64 signal_value = master_semaphore->NextTick();
-    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
         cmdbuf.End();
-
-        const u32 num_signal_semaphores = semaphore ? 2U : 1U;
-
-        const u64 wait_value = signal_value - 1;
-        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
         const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+
+        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
         const std::array signal_values{signal_value, u64(0)};
-        const std::array signal_semaphores{timeline_semaphore, semaphore};
+        const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
+        const std::array wait_values{signal_value - 1, u64(1)};
+        const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+        };
 
         const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
             .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
             .pNext = nullptr,
-            .waitSemaphoreValueCount = 1,
-            .pWaitSemaphoreValues = &wait_value,
+            .waitSemaphoreValueCount = num_wait_semaphores,
+            .pWaitSemaphoreValues = wait_values.data(),
             .signalSemaphoreValueCount = num_signal_semaphores,
             .pSignalSemaphoreValues = signal_values.data(),
         };
         const VkSubmitInfo submit_info{
             .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
             .pNext = &timeline_si,
-            .waitSemaphoreCount = 1,
-            .pWaitSemaphores = &timeline_semaphore,
-            .pWaitDstStageMask = &wait_stage_mask,
+            .waitSemaphoreCount = num_wait_semaphores,
+            .pWaitSemaphores = wait_semaphores.data(),
+            .pWaitDstStageMask = wait_stage_masks.data(),
             .commandBufferCount = 1,
             .pCommandBuffers = cmdbuf.address(),
             .signalSemaphoreCount = num_signal_semaphores,
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cf39a2363..759ed5a48 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,10 +34,10 @@ public:
     ~VKScheduler();
 
     /// Sends the current execution context to the GPU.
-    void Flush(VkSemaphore semaphore = nullptr);
+    void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
 
     /// Sends the current execution context to the GPU and waits for it to complete.
-    void Finish(VkSemaphore semaphore = nullptr);
+    void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
 
     /// Waits for the worker thread to finish executing everything. After this function returns it's
     /// safe to touch worker resources.
@@ -191,7 +191,7 @@ private:
 
     void AllocateWorkerCommandBuffer();
 
-    void SubmitExecution(VkSemaphore semaphore);
+    void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
 
     void AllocateNewContext();
 
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index e5318e52d..aadf03cb0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -106,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {
 }
 
 void VKSwapchain::Present(VkSemaphore render_semaphore) {
-    const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
-    const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
     const auto present_queue{device.GetPresentQueue()};
     const VkPresentInfoKHR present_info{
         .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
         .pNext = nullptr,
-        .waitSemaphoreCount = render_semaphore ? 2U : 1U,
-        .pWaitSemaphores = semaphores.data(),
+        .waitSemaphoreCount = render_semaphore ? 1U : 0U,
+        .pWaitSemaphores = &render_semaphore,
         .swapchainCount = 1,
         .pSwapchains = swapchain.address(),
         .pImageIndices = &image_index,
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index cd472dd0a..5bce41e21 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -72,6 +72,10 @@ public:
         return image_view_format;
     }
 
+    VkSemaphore CurrentPresentSemaphore() const {
+        return *present_semaphores[frame_index];
+    }
+
 private:
     void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
                          bool srgb);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index ff1feda9b..0c17a791b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -80,7 +80,7 @@ struct ImageBase {
     VAddr cpu_addr_end = 0;
 
     u64 modification_tick = 0;
-    u64 frame_tick = 0;
+    size_t lru_index = SIZE_MAX;
 
     std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a087498ff..24b809242 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include "common/alignment.h"
-#include "common/settings.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/texture_cache/samples_helper.h"
 #include "video_core/texture_cache/texture_cache_base.h"
@@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
     void(slot_image_views.insert(runtime, NullImageParams{}));
     void(slot_samplers.insert(runtime, sampler_descriptor));
 
-    deletion_iterator = slot_images.begin();
-
     if constexpr (HAS_DEVICE_MEMORY_INFO) {
         const auto device_memory = runtime.GetDeviceLocalMemory();
         const u64 possible_expected_memory = (device_memory * 3) / 10;
@@ -64,70 +61,38 @@ template <class P>
 void TextureCache<P>::RunGarbageCollector() {
     const bool high_priority_mode = total_used_memory >= expected_memory;
     const bool aggressive_mode = total_used_memory >= critical_memory;
-    const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
-    int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
-    for (; num_iterations > 0; --num_iterations) {
-        if (deletion_iterator == slot_images.end()) {
-            deletion_iterator = slot_images.begin();
-            if (deletion_iterator == slot_images.end()) {
-                break;
-            }
+    const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
+    size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
+    const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
+        if (num_iterations == 0) {
+            return true;
         }
-        auto [image_id, image_tmp] = *deletion_iterator;
-        Image* image = image_tmp; // fix clang error.
-        const bool is_alias = True(image->flags & ImageFlagBits::Alias);
-        const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
-        const bool must_download = image->IsSafeDownload();
-        bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
-        const u64 ticks_needed =
-            is_bad_overlap
-                ? ticks_to_destroy >> 4
-                : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
-        should_care |= aggressive_mode;
-        if (should_care && image->frame_tick + ticks_needed < frame_tick) {
-            if (is_bad_overlap) {
-                const bool overlap_check = std::ranges::all_of(
-                    image->overlapping_images, [&, image](const ImageId& overlap_id) {
-                        auto& overlap = slot_images[overlap_id];
-                        return overlap.frame_tick >= image->frame_tick;
-                    });
-                if (!overlap_check) {
-                    ++deletion_iterator;
-                    continue;
-                }
-            }
-            if (!is_bad_overlap && must_download) {
-                const bool alias_check = std::ranges::none_of(
-                    image->aliased_images, [&, image](const AliasedImage& alias) {
-                        auto& alias_image = slot_images[alias.id];
-                        return (alias_image.frame_tick < image->frame_tick) ||
-                               (alias_image.modification_tick < image->modification_tick);
-                    });
-
-                if (alias_check) {
-                    auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
-                    const auto copies = FullDownloadCopies(image->info);
-                    image->DownloadMemory(map, copies);
-                    runtime.Finish();
-                    SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
-                }
-            }
-            if (True(image->flags & ImageFlagBits::Tracked)) {
-                UntrackImage(*image, image_id);
-            }
-            UnregisterImage(image_id);
-            DeleteImage(image_id);
-            if (is_bad_overlap) {
-                ++num_iterations;
-            }
+        --num_iterations;
+        auto& image = slot_images[image_id];
+        const bool must_download = image.IsSafeDownload();
+        if (!high_priority_mode && must_download) {
+            return false;
         }
-        ++deletion_iterator;
-    }
+        if (must_download) {
+            auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
+            const auto copies = FullDownloadCopies(image.info);
+            image.DownloadMemory(map, copies);
+            runtime.Finish();
+            SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+        }
+        if (True(image.flags & ImageFlagBits::Tracked)) {
+            UntrackImage(image, image_id);
+        }
+        UnregisterImage(image_id);
+        DeleteImage(image_id);
+        return false;
+    };
+    lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
 }
 
 template <class P>
 void TextureCache<P>::TickFrame() {
-    if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
+    if (total_used_memory > minimum_memory) {
         RunGarbageCollector();
     }
     sentenced_images.Tick();
@@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
     }
     total_used_memory += Common::AlignUp(tentative_size, 1024);
+    image.lru_index = lru_cache.Insert(image_id, frame_tick);
+
     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
                    [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
     if (False(image.flags & ImageFlagBits::Sparse)) {
@@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
     }
     total_used_memory -= Common::AlignUp(tentative_size, 1024);
+    lru_cache.Free(image.lru_index);
     const auto& clear_page_table =
         [this, image_id](
             u64 page,
@@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
     if (is_modification) {
         MarkModification(image);
     }
-    image.frame_tick = frame_tick;
+    lru_cache.Touch(image.lru_index, frame_tick);
 }
 
 template <class P>
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index e4ae351cb..d7528ed24 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -14,6 +14,7 @@
 
 #include "common/common_types.h"
 #include "common/literals.h"
+#include "common/lru_cache.h"
 #include "video_core/compatible_formats.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/engines/fermi_2d.h"
@@ -370,6 +371,12 @@ private:
     std::vector<ImageId> uncommitted_downloads;
     std::queue<std::vector<ImageId>> committed_downloads;
 
+    struct LRUItemParams {
+        using ObjectType = ImageId;
+        using TickType = u64;
+    };
+    Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
+
     static constexpr size_t TICKS_TO_DESTROY = 6;
     DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
     DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
@@ -379,7 +386,6 @@ private:
 
     u64 modification_tick = 0;
     u64 frame_tick = 0;
-    typename SlotVector<Image>::Iterator deletion_iterator;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index c010b9353..24e943e4c 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
                 const u32 unswizzled_offset =
                     slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
 
-                if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset);
-                    offset >= input.size()) {
-                    // TODO(Rodrigo): This is an out of bounds access that should never happen. To
-                    // avoid crashing the emulator, break.
-                    ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size());
-                    break;
-                }
-
                 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
                 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
 
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 377795326..85d292bcc 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -818,7 +818,6 @@ void Config::ReadRendererValues() {
     ReadGlobalSetting(Settings::values.shader_backend);
     ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
     ReadGlobalSetting(Settings::values.use_fast_gpu_time);
-    ReadGlobalSetting(Settings::values.use_caches_gc);
     ReadGlobalSetting(Settings::values.bg_red);
     ReadGlobalSetting(Settings::values.bg_green);
     ReadGlobalSetting(Settings::values.bg_blue);
@@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() {
                  Settings::values.shader_backend.UsingGlobal());
     WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
     WriteGlobalSetting(Settings::values.use_fast_gpu_time);
-    WriteGlobalSetting(Settings::values.use_caches_gc);
     WriteGlobalSetting(Settings::values.bg_red);
     WriteGlobalSetting(Settings::values.bg_green);
     WriteGlobalSetting(Settings::values.bg_blue);
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 099ddbb7c..43f1887d1 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -156,7 +156,7 @@
         <item>
          <widget class="QCheckBox" name="use_disk_shader_cache">
           <property name="text">
-           <string>Use disk shader cache</string>
+           <string>Use disk pipeline cache</string>
           </property>
          </widget>
         </item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index a31b8e192..bfd464061 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
 
     ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
     ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
-    ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
     ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
 
     if (Settings::IsConfiguringGlobal()) {
@@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
                                              ui->use_asynchronous_shaders,
                                              use_asynchronous_shaders);
-    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
-                                             use_caches_gc);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
                                              ui->use_fast_gpu_time, use_fast_gpu_time);
 }
@@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
         ui->use_asynchronous_shaders->setEnabled(
             Settings::values.use_asynchronous_shaders.UsingGlobal());
         ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
-        ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
         ui->anisotropic_filtering_combobox->setEnabled(
             Settings::values.max_anisotropy.UsingGlobal());
 
@@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
                                             use_asynchronous_shaders);
     ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
                                             Settings::values.use_fast_gpu_time, use_fast_gpu_time);
-    ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
-                                            use_caches_gc);
     ConfigurationShared::SetColoredComboBox(
         ui->gpu_accuracy, ui->label_gpu_accuracy,
         static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 7356e6916..13ba4ff6b 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -37,5 +37,4 @@ private:
     ConfigurationShared::CheckState use_vsync;
     ConfigurationShared::CheckState use_asynchronous_shaders;
     ConfigurationShared::CheckState use_fast_gpu_time;
-    ConfigurationShared::CheckState use_caches_gc;
 };
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 4fe6b86ae..b91abc2f0 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -82,7 +82,7 @@
            <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
           </property>
           <property name="text">
-           <string>Use asynchronous shader building (hack)</string>
+           <string>Use asynchronous shader building (Hack)</string>
           </property>
          </widget>
         </item>
@@ -92,17 +92,7 @@
             <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
           </property>
           <property name="text">
-           <string>Use Fast GPU Time (hack)</string>
-          </property>
-         </widget>
-        </item>
-        <item>
-         <widget class="QCheckBox" name="use_caches_gc">
-          <property name="toolTip">
-           <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
-          </property>
-          <property name="text">
-           <string>Enable GPU cache garbage collection (experimental)</string>
+           <string>Use Fast GPU Time (Hack)</string>
           </property>
          </widget>
         </item>
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index e97804220..f9d949e75 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
     QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location"));
     QAction* open_transferable_shader_cache =
-        context_menu.addAction(tr("Open Transferable Shader Cache"));
+        context_menu.addAction(tr("Open Transferable Pipeline Cache"));
     context_menu.addSeparator();
     QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
     QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
     QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
     QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
-    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
-    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
+    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache"));
+    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));
     remove_menu->addSeparator();
-    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
+    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches"));
     QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
     QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
     QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 4f14be524..757dd1ea0 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -468,7 +468,6 @@ void Config::ReadValues() {
     ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
     ReadSetting("Renderer", Settings::values.accelerate_astc);
     ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
-    ReadSetting("Renderer", Settings::values.use_caches_gc);
 
     ReadSetting("Renderer", Settings::values.bg_red);
     ReadSetting("Renderer", Settings::values.bg_green);