48 files changed, 406 insertions, 206 deletions
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject af2d50288fc537201014c4230bb55ab9018a743
+Subproject 644172477eaf0d822178cb7e96c62b75caa9657
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 9120cc178..4acbff649 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -101,6 +101,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
     SUB(Service, GRC)                                                                              \
     SUB(Service, HID)                                                                              \
     SUB(Service, IRS)                                                                              \
+    SUB(Service, JIT)                                                                              \
     SUB(Service, LBL)                                                                              \
     SUB(Service, LDN)                                                                              \
     SUB(Service, LDR)                                                                              \
diff --git a/src/common/logging/types.h b/src/common/logging/types.h
index f803ab796..99c15fa96 100644
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -69,6 +69,7 @@ enum class Class : u8 {
     Service_GRC,       ///< The game recording service
     Service_HID,       ///< The HID (Human interface device) service
     Service_IRS,       ///< The IRS service
+    Service_JIT,       ///< The JIT service
     Service_LBL,       ///< The LBL (LCD backlight) service
     Service_LDN,       ///< The LDN (Local domain network) service
     Service_LDR,       ///< The loader service
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 7a3f21dcf..7fd9d22f8 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -10,25 +10,49 @@
 #include "common/uint128.h"
 #include "common/x64/native_clock.h"
 
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
 namespace Common {
 
+#ifdef _MSC_VER
+__forceinline static u64 FencedRDTSC() {
+    _mm_lfence();
+    _ReadWriteBarrier();
+    const u64 result = __rdtsc();
+    _mm_lfence();
+    _ReadWriteBarrier();
+    return result;
+}
+#else
+static u64 FencedRDTSC() {
+    u64 result;
+    asm volatile("lfence\n\t"
+                 "rdtsc\n\t"
+                 "shl $32, %%rdx\n\t"
+                 "or %%rdx, %0\n\t"
+                 "lfence"
+                 : "=a"(result)
+                 :
+                 : "rdx", "memory", "cc");
+    return result;
+}
+#endif
+
 u64 EstimateRDTSCFrequency() {
     // Discard the first result measuring the rdtsc.
-    _mm_mfence();
-    __rdtsc();
+    FencedRDTSC();
     std::this_thread::sleep_for(std::chrono::milliseconds{1});
-    _mm_mfence();
-    __rdtsc();
+    FencedRDTSC();
 
     // Get the current time.
     const auto start_time = std::chrono::steady_clock::now();
-    _mm_mfence();
-    const u64 tsc_start = __rdtsc();
+    const u64 tsc_start = FencedRDTSC();
     // Wait for 200 milliseconds.
     std::this_thread::sleep_for(std::chrono::milliseconds{200});
     const auto end_time = std::chrono::steady_clock::now();
-    _mm_mfence();
-    const u64 tsc_end = __rdtsc();
+    const u64 tsc_end = FencedRDTSC();
     // Calculate differences.
     const u64 timer_diff = static_cast<u64>(
         std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
@@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
                          u64 rtsc_frequency_)
     : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
                                                                                rtsc_frequency_} {
-    _mm_mfence();
-    time_point.inner.last_measure = __rdtsc();
+    time_point.inner.last_measure = FencedRDTSC();
     time_point.inner.accumulated_ticks = 0U;
     ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
     us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
@@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
 
     current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
     do {
-        _mm_mfence();
-        const u64 current_measure = __rdtsc();
+        const u64 current_measure = FencedRDTSC();
         u64 diff = current_measure - current_time_point.inner.last_measure;
         diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
         new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
@@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
         current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
         do {
             new_time_point.pack = current_time_point.pack;
-            _mm_mfence();
-            new_time_point.inner.last_measure = __rdtsc();
+            new_time_point.inner.last_measure = FencedRDTSC();
         } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
                                                current_time_point.pack, current_time_point.pack));
     }
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 6536d0544..81eaf0942 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -458,6 +458,8 @@ add_library(core STATIC
     hle/service/hid/controllers/touchscreen.h
     hle/service/hid/controllers/xpad.cpp
     hle/service/hid/controllers/xpad.h
+    hle/service/jit/jit.cpp
+    hle/service/jit/jit.h
     hle/service/lbl/lbl.cpp
     hle/service/lbl/lbl.h
     hle/service/ldn/errors.h
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c60322442..dce2f4195 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -171,6 +171,9 @@ public:
     /// Prepare core for thread reschedule (if needed to correctly handle state)
     virtual void PrepareReschedule() = 0;
 
+    /// Signal an interrupt and ask the core to halt as soon as possible.
+    virtual void SignalInterrupt() = 0;
+
     struct BacktraceEntry {
         std::string module;
         u64 address;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 054572445..ab3210d84 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -25,6 +25,9 @@ namespace Core {
 
 using namespace Common::Literals;
 
+constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
+constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
+
 class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
 public:
     explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
@@ -84,15 +87,13 @@ public:
     }
 
     void CallSVC(u32 swi) override {
-        parent.svc_called = true;
         parent.svc_swi = swi;
-        parent.jit->HaltExecution();
+        parent.jit->HaltExecution(svc_call);
     }
 
     void AddTicks(u64 ticks) override {
-        if (parent.uses_wall_clock) {
-            return;
-        }
+        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+
         // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
         // rough approximation of the amount of executed ticks in the system, it may be thrown off
         // if not all cores are doing a similar amount of work. Instead of doing this, we should
@@ -108,12 +109,8 @@ public:
     }
 
     u64 GetTicksRemaining() override {
-        if (parent.uses_wall_clock) {
-            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-                return minimum_run_cycles;
-            }
-            return 0U;
-        }
+        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+
         return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
     }
 
@@ -148,6 +145,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
 
     // Timing
     config.wall_clock_cntpct = uses_wall_clock;
+    config.enable_cycle_counting = !uses_wall_clock;
 
     // Code cache size
     config.code_cache_size = 512_MiB;
@@ -230,13 +228,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
 
 void ARM_Dynarmic_32::Run() {
     while (true) {
-        jit->Run();
-        if (!svc_called) {
-            break;
+        const auto hr = jit->Run();
+        if (Has(hr, svc_call)) {
+            Kernel::Svc::Call(system, svc_swi);
         }
-        svc_called = false;
-        Kernel::Svc::Call(system, svc_swi);
-        if (shutdown) {
+        if (Has(hr, break_loop)) {
             break;
         }
     }
@@ -322,8 +318,11 @@ void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
 }
 
 void ARM_Dynarmic_32::PrepareReschedule() {
-    jit->HaltExecution();
-    shutdown = true;
+    jit->HaltExecution(break_loop);
+}
+
+void ARM_Dynarmic_32::SignalInterrupt() {
+    jit->HaltExecution(break_loop);
 }
 
 void ARM_Dynarmic_32::ClearInstructionCache() {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 5d47b600d..3f68a4ff1 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -57,6 +57,7 @@ public:
     void LoadContext(const ThreadContext64& ctx) override {}
 
     void PrepareReschedule() override;
+    void SignalInterrupt() override;
     void ClearExclusiveState() override;
 
     void ClearInstructionCache() override;
@@ -83,9 +84,6 @@ private:
 
     // SVC callback
     u32 svc_swi{};
-    bool svc_called{};
-
-    bool shutdown{};
 };
 
 } // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 7ff8f9495..68822a1fc 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -26,6 +26,9 @@ namespace Core {
 using Vector = Dynarmic::A64::Vector;
 using namespace Common::Literals;
 
+constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
+constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
+
 class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
 public:
     explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
@@ -106,7 +109,7 @@ public:
             break;
         }
 
-        parent.jit->HaltExecution();
+        parent.jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
     }
 
     void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@@ -126,15 +129,12 @@ public:
     }
 
     void CallSVC(u32 swi) override {
-        parent.svc_called = true;
         parent.svc_swi = swi;
-        parent.jit->HaltExecution();
+        parent.jit->HaltExecution(svc_call);
     }
 
     void AddTicks(u64 ticks) override {
-        if (parent.uses_wall_clock) {
-            return;
-        }
+        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
 
         // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
         // rough approximation of the amount of executed ticks in the system, it may be thrown off
@@ -149,12 +149,8 @@ public:
     }
 
     u64 GetTicksRemaining() override {
-        if (parent.uses_wall_clock) {
-            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-                return minimum_run_cycles;
-            }
-            return 0U;
-        }
+        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+
         return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
     }
 
@@ -210,6 +206,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
 
     // Timing
     config.wall_clock_cntpct = uses_wall_clock;
+    config.enable_cycle_counting = !uses_wall_clock;
 
     // Code cache size
     config.code_cache_size = 512_MiB;
@@ -292,13 +289,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
 
 void ARM_Dynarmic_64::Run() {
     while (true) {
-        jit->Run();
-        if (!svc_called) {
-            break;
+        const auto hr = jit->Run();
+        if (Has(hr, svc_call)) {
+            Kernel::Svc::Call(system, svc_swi);
         }
-        svc_called = false;
-        Kernel::Svc::Call(system, svc_swi);
-        if (shutdown) {
+        if (Has(hr, break_loop)) {
             break;
         }
     }
@@ -389,8 +384,11 @@ void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
 }
 
 void ARM_Dynarmic_64::PrepareReschedule() {
-    jit->HaltExecution();
-    shutdown = true;
+    jit->HaltExecution(break_loop);
+}
+
+void ARM_Dynarmic_64::SignalInterrupt() {
+    jit->HaltExecution(break_loop);
 }
 
 void ARM_Dynarmic_64::ClearInstructionCache() {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 0c4e46c64..58bc7fbec 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -51,6 +51,7 @@ public:
     void LoadContext(const ThreadContext64& ctx) override;
 
     void PrepareReschedule() override;
+    void SignalInterrupt() override;
     void ClearExclusiveState() override;
 
     void ClearInstructionCache() override;
@@ -77,9 +78,6 @@ private:
 
     // SVC callback
     u32 svc_swi{};
-    bool svc_called{};
-
-    bool shutdown{};
 };
 
 } // namespace Core
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 42d1b0e31..b547a3463 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -24,8 +24,15 @@
 
 namespace Kernel {
 
-SessionRequestHandler::SessionRequestHandler(KernelCore& kernel_, const char* service_name_)
-    : kernel{kernel_}, service_thread{kernel.CreateServiceThread(service_name_)} {}
+SessionRequestHandler::SessionRequestHandler(KernelCore& kernel_, const char* service_name_,
+                                             ServiceThreadType thread_type)
+    : kernel{kernel_} {
+    if (thread_type == ServiceThreadType::CreateNew) {
+        service_thread = kernel.CreateServiceThread(service_name_);
+    } else {
+        service_thread = kernel.GetDefaultServiceThread();
+    }
+}
 
 SessionRequestHandler::~SessionRequestHandler() {
     kernel.ReleaseServiceThread(service_thread);
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index 670cc741c..640146137 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -33,6 +33,11 @@ namespace Service {
 class ServiceFrameworkBase;
 }
 
+enum class ServiceThreadType {
+    Default,
+    CreateNew,
+};
+
 namespace Kernel {
 
 class Domain;
@@ -57,7 +62,8 @@ enum class ThreadWakeupReason;
  */
 class SessionRequestHandler : public std::enable_shared_from_this<SessionRequestHandler> {
 public:
-    SessionRequestHandler(KernelCore& kernel, const char* service_name_);
+    SessionRequestHandler(KernelCore& kernel_, const char* service_name_,
+                          ServiceThreadType thread_type);
     virtual ~SessionRequestHandler();
 
     /**
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 34da7c23b..6387d0c29 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -61,6 +61,7 @@ struct KernelCore::Impl {
         global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
         global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
         global_handle_table->Initialize(KHandleTable::MaxTableSize);
+        default_service_thread = CreateServiceThread(kernel, "DefaultServiceThread");
 
         is_phantom_mode_for_singlecore = false;
 
@@ -677,6 +678,12 @@ struct KernelCore::Impl {
 
     void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) {
         if (auto strong_ptr = service_thread.lock()) {
+            if (strong_ptr == default_service_thread.lock()) {
+                // Nothing to do here, the service is using default_service_thread, which will be
+                // released on shutdown.
+                return;
+            }
+
             service_threads_manager.QueueWork(
                 [this, strong_ptr{std::move(strong_ptr)}]() { service_threads.erase(strong_ptr); });
         }
@@ -739,7 +746,8 @@ struct KernelCore::Impl {
     std::unique_ptr<KMemoryLayout> memory_layout;
 
     // Threads used for services
-    std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
+    std::unordered_set<std::shared_ptr<ServiceThread>> service_threads;
+    std::weak_ptr<ServiceThread> default_service_thread;
     Common::ThreadWorker service_threads_manager;
 
     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> suspend_threads;
@@ -1065,6 +1073,10 @@ std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::
     return impl->CreateServiceThread(*this, name);
 }
 
+std::weak_ptr<Kernel::ServiceThread> KernelCore::GetDefaultServiceThread() const {
+    return impl->default_service_thread;
+}
+
 void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) {
     impl->ReleaseServiceThread(service_thread);
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 4c68e96df..24e26fa44 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -271,9 +271,11 @@ public:
     void ExitSVCProfile();
 
     /**
-     * Creates an HLE service thread, which are used to execute service routines asynchronously.
-     * While these are allocated per ServerSession, these need to be owned and managed outside
-     * of ServerSession to avoid a circular dependency.
+     * Creates a host thread to execute HLE service requests, which are used to execute service
+     * routines asynchronously. While these are allocated per ServerSession, these need to be owned
+     * and managed outside of ServerSession to avoid a circular dependency. In general, most
+     * services can just use the default service thread, and not need their own host service thread.
+     * See GetDefaultServiceThread.
      * @param name String name for the ServerSession creating this thread, used for debug
      * purposes.
      * @returns The a weak pointer newly created service thread.
@@ -281,6 +283,14 @@ public:
     std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name);
 
     /**
+     * Gets the default host service thread, which executes HLE service requests. Unless service
+     * requests need to block on the host, the default service thread should be used in favor of
+     * creating a new service thread.
+     * @returns The a weak pointer for the default service thread.
+     */
+    std::weak_ptr<Kernel::ServiceThread> GetDefaultServiceThread() const;
+
+    /**
      * Releases a HLE service thread, instructing KernelCore to free it. This should be called when
      * the ServerSession associated with the thread is destroyed.
      * @param service_thread Service thread to release.
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 7477668e4..18a5f40f8 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -58,6 +58,7 @@ bool PhysicalCore::IsInterrupted() const {
 void PhysicalCore::Interrupt() {
     guard->lock();
     interrupts[core_index].SetInterrupt(true);
+    arm_interface->SignalInterrupt();
     guard->unlock();
 }
 
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 420de3c54..4d7e5ecd3 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1337,7 +1337,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
         {200, nullptr, "GetLastApplicationExitReason"},
         {500, nullptr, "StartContinuousRecordingFlushForDebug"},
         {1000, nullptr, "CreateMovieMaker"},
-        {1001, nullptr, "PrepareForJit"},
+        {1001, &IApplicationFunctions::PrepareForJit, "PrepareForJit"},
     };
     // clang-format on
 
@@ -1787,6 +1787,13 @@ void IApplicationFunctions::GetHealthWarningDisappearedSystemEvent(Kernel::HLERe
     rb.PushCopyObjects(health_warning_disappeared_system_event->GetReadableEvent());
 }
 
+void IApplicationFunctions::PrepareForJit(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_AM, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(ResultSuccess);
+}
+
 void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
                        Core::System& system) {
     auto message_queue = std::make_shared<AppletMessageQueue>(system);
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index fdd937b82..11a3c0459 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -336,6 +336,7 @@ private:
     void TryPopFromFriendInvitationStorageChannel(Kernel::HLERequestContext& ctx);
     void GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx);
     void GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx);
+    void PrepareForJit(Kernel::HLERequestContext& ctx);
 
     KernelHelpers::ServiceContext service_context;
 
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index affa7971c..a72956a28 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -41,9 +41,10 @@ public:
     explicit IAudioOut(Core::System& system_, AudoutParams audio_params_,
                        AudioCore::AudioOut& audio_core_, std::string&& device_name_,
                        std::string&& unique_name)
-        : ServiceFramework{system_, "IAudioOut"}, audio_core{audio_core_},
-          device_name{std::move(device_name_)}, audio_params{audio_params_},
-          main_memory{system.Memory()}, service_context{system_, "IAudioOut"} {
+        : ServiceFramework{system_, "IAudioOut", ServiceThreadType::CreateNew},
+          audio_core{audio_core_}, device_name{std::move(device_name_)},
+          audio_params{audio_params_}, main_memory{system.Memory()}, service_context{system_,
+                                                                                     "IAudioOut"} {
         // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"},
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index f45e5cecc..d4ffeb21d 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -24,7 +24,8 @@ public:
     explicit IAudioRenderer(Core::System& system_,
                             const AudioCommon::AudioRendererParameter& audren_params,
                             const std::size_t instance_number)
-        : ServiceFramework{system_, "IAudioRenderer"}, service_context{system_, "IAudioRenderer"} {
+        : ServiceFramework{system_, "IAudioRenderer", ServiceThreadType::CreateNew},
+          service_context{system_, "IAudioRenderer"} {
         // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IAudioRenderer::GetSampleRate, "GetSampleRate"},
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index b087e7bba..c07929ab8 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -58,7 +58,8 @@ enum class FileSystemType : u8 {
 class IStorage final : public ServiceFramework<IStorage> {
 public:
     explicit IStorage(Core::System& system_, FileSys::VirtualFile backend_)
-        : ServiceFramework{system_, "IStorage"}, backend(std::move(backend_)) {
+        : ServiceFramework{system_, "IStorage", ServiceThreadType::CreateNew},
+          backend(std::move(backend_)) {
         static const FunctionInfo functions[] = {
             {0, &IStorage::Read, "Read"},
             {1, nullptr, "Write"},
@@ -116,7 +117,8 @@ private:
 class IFile final : public ServiceFramework<IFile> {
 public:
     explicit IFile(Core::System& system_, FileSys::VirtualFile backend_)
-        : ServiceFramework{system_, "IFile"}, backend(std::move(backend_)) {
+        : ServiceFramework{system_, "IFile", ServiceThreadType::CreateNew},
+          backend(std::move(backend_)) {
         static const FunctionInfo functions[] = {
             {0, &IFile::Read, "Read"},
             {1, &IFile::Write, "Write"},
@@ -252,7 +254,8 @@ static void BuildEntryIndex(std::vector<FileSys::Entry>& entries, const std::vec
 class IDirectory final : public ServiceFramework<IDirectory> {
 public:
     explicit IDirectory(Core::System& system_, FileSys::VirtualDir backend_)
-        : ServiceFramework{system_, "IDirectory"}, backend(std::move(backend_)) {
+        : ServiceFramework{system_, "IDirectory", ServiceThreadType::CreateNew},
+          backend(std::move(backend_)) {
         static const FunctionInfo functions[] = {
             {0, &IDirectory::Read, "Read"},
             {1, &IDirectory::GetEntryCount, "GetEntryCount"},
@@ -308,8 +311,8 @@ private:
 class IFileSystem final : public ServiceFramework<IFileSystem> {
 public:
     explicit IFileSystem(Core::System& system_, FileSys::VirtualDir backend_, SizeGetter size_)
-        : ServiceFramework{system_, "IFileSystem"}, backend{std::move(backend_)}, size{std::move(
-                                                                                      size_)} {
+        : ServiceFramework{system_, "IFileSystem", ServiceThreadType::CreateNew},
+          backend{std::move(backend_)}, size{std::move(size_)} {
         static const FunctionInfo functions[] = {
             {0, &IFileSystem::CreateFile, "CreateFile"},
             {1, &IFileSystem::DeleteFile, "DeleteFile"},
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 92e6bf889..b2cec2253 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -63,6 +63,10 @@ IAppletResource::IAppletResource(Core::System& system_,
     MakeController<Controller_Gesture>(HidController::Gesture);
     MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor);
 
+    // Homebrew doesn't try to activate some controllers, so we activate them by default
+    GetController<Controller_NPad>(HidController::NPad).ActivateController();
+    GetController<Controller_Touchscreen>(HidController::Touchscreen).ActivateController();
+
     GetController<Controller_Stubbed>(HidController::HomeButton).SetCommonHeaderOffset(0x4C00);
     GetController<Controller_Stubbed>(HidController::SleepButton).SetCommonHeaderOffset(0x4E00);
     GetController<Controller_Stubbed>(HidController::CaptureButton).SetCommonHeaderOffset(0x5000);
diff --git a/src/core/hle/service/jit/jit.cpp b/src/core/hle/service/jit/jit.cpp
new file mode 100644
index 000000000..c8ebd2e3f
--- /dev/null
+++ b/src/core/hle/service/jit/jit.cpp
@@ -0,0 +1,53 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/result.h"
+#include "core/hle/service/jit/jit.h"
+#include "core/hle/service/service.h"
+
+namespace Service::JIT {
+
+class IJitEnvironment final : public ServiceFramework<IJitEnvironment> {
+public:
+    explicit IJitEnvironment(Core::System& system_) : ServiceFramework{system_, "IJitEnvironment"} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, nullptr, "GenerateCode"},
+            {1, nullptr, "Control"},
+            {1000, nullptr, "LoadPlugin"},
+            {1001, nullptr, "GetCodeAddress"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+};
+
+class JITU final : public ServiceFramework<JITU> {
+public:
+    explicit JITU(Core::System& system_) : ServiceFramework{system_, "jit:u"} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, &JITU::CreateJitEnvironment, "CreateJitEnvironment"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+    void CreateJitEnvironment(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_JIT, "called");
+
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+        rb.Push(ResultSuccess);
+        rb.PushIpcInterface<IJitEnvironment>(system);
+    }
+};
+
+void InstallInterfaces(SM::ServiceManager& sm, Core::System& system) {
+    std::make_shared<JITU>(system)->InstallAsService(sm);
+}
+
+} // namespace Service::JIT
diff --git a/src/core/hle/service/jit/jit.h b/src/core/hle/service/jit/jit.h
new file mode 100644
index 000000000..8fbf504a1
--- /dev/null
+++ b/src/core/hle/service/jit/jit.h
@@ -0,0 +1,20 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Core {
+class System;
+}
+
+namespace Service::SM {
+class ServiceManager;
+}
+
+namespace Service::JIT {
+
+/// Registers all JIT services with the specified service manager.
+void InstallInterfaces(SM::ServiceManager& sm, Core::System& system);
+
+} // namespace Service::JIT
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index 1ce2a856b..8467b50e4 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -230,7 +230,7 @@ void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
 }
 
 NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* name)
-    : ServiceFramework{system_, name}, nvdrv{std::move(nvdrv_)} {
+    : ServiceFramework{system_, name, ServiceThreadType::CreateNew}, nvdrv{std::move(nvdrv_)} {
     static const FunctionInfo functions[] = {
         {0, &NVDRV::Open, "Open"},
         {1, &NVDRV::Ioctl1, "Ioctl"},
diff --git a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
index 41fbba219..c527c577e 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
@@ -18,8 +18,7 @@ BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_)
 BufferQueueConsumer::~BufferQueueConsumer() = default;
 
 Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
-                                          std::chrono::nanoseconds expected_present,
-                                          u64 max_frame_number) {
+                                          std::chrono::nanoseconds expected_present) {
     std::scoped_lock lock(core->mutex);
 
     // Check that the consumer doesn't currently have the maximum number of buffers acquired.
@@ -50,12 +49,6 @@ Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
         while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
             const auto& buffer_item{core->queue[1]};
 
-            // If dropping entry[0] would leave us with a buffer that the consumer is not yet ready
-            // for, don't drop it.
-            if (max_frame_number && buffer_item.frame_number > max_frame_number) {
-                break;
-            }
-
             // If entry[1] is timely, drop entry[0] (and repeat).
             const auto desired_present = buffer_item.timestamp;
             if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
@@ -200,4 +193,39 @@ Status BufferQueueConsumer::Connect(std::shared_ptr<IConsumerListener> consumer_
     return Status::NoError;
 }
 
+Status BufferQueueConsumer::GetReleasedBuffers(u64* out_slot_mask) {
+    if (out_slot_mask == nullptr) {
+        LOG_ERROR(Service_NVFlinger, "out_slot_mask may not be nullptr");
+        return Status::BadValue;
+    }
+
+    std::scoped_lock lock(core->mutex);
+
+    if (core->is_abandoned) {
+        LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
+        return Status::NoInit;
+    }
+
+    u64 mask = 0;
+    for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) {
+        if (!slots[s].acquire_called) {
+            mask |= (1ULL << s);
+        }
+    }
+
+    // Remove from the mask queued buffers for which acquire has been called, since the consumer
+    // will not receive their buffer addresses and so must retain their cached information
+    auto current(core->queue.begin());
+    while (current != core->queue.end()) {
+        if (current->acquire_called) {
+            mask &= ~(1ULL << current->slot);
+        }
+        ++current;
+    }
+
+    LOG_DEBUG(Service_NVFlinger, "returning mask {}", mask);
+    *out_slot_mask = mask;
+    return Status::NoError;
+}
+
 } // namespace Service::android
diff --git a/src/core/hle/service/nvflinger/buffer_queue_consumer.h b/src/core/hle/service/nvflinger/buffer_queue_consumer.h
index f22854394..8a047fe06 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_consumer.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.h
@@ -24,10 +24,10 @@ public:
     explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
     ~BufferQueueConsumer();
 
-    Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present,
-                         u64 max_frame_number = 0);
+    Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present);
     Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence);
     Status Connect(std::shared_ptr<IConsumerListener> consumer_listener, bool controlled_by_app);
+    Status GetReleasedBuffers(u64* out_slot_mask);
 
 private:
     std::shared_ptr<BufferQueueCore> core;
diff --git a/src/core/hle/service/nvflinger/buffer_queue_core.cpp b/src/core/hle/service/nvflinger/buffer_queue_core.cpp
index 6082610e0..3a0481786 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_core.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_core.cpp
@@ -95,7 +95,6 @@ void BufferQueueCore::FreeBufferLocked(s32 slot) {
 }
 
 void BufferQueueCore::FreeAllBuffersLocked() {
-    queue.clear();
     buffer_has_been_queued = false;
 
     for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {
diff --git a/src/core/hle/service/nvflinger/buffer_queue_core.h b/src/core/hle/service/nvflinger/buffer_queue_core.h
index 4dfd53387..e4e0937cb 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_core.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_core.h
@@ -73,8 +73,6 @@ private:
     u32 transform_hint{};
     bool is_allocating{};
     mutable std::condition_variable_any is_allocating_condition;
-    bool allow_allocation{true};
-    u64 buffer_age{};
     bool is_shutting_down{};
 };
 
diff --git a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
index 0833be57a..3d6e990c3 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
@@ -62,11 +62,12 @@ Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr<GraphicBuffe
 
 Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
     LOG_DEBUG(Service_NVFlinger, "count = {}", buffer_count);
-    std::shared_ptr<IConsumerListener> listener;
 
+    std::shared_ptr<IConsumerListener> listener;
     {
         std::scoped_lock lock(core->mutex);
         core->WaitWhileAllocatingLocked();
+
         if (core->is_abandoned) {
             LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
             return Status::NoInit;
@@ -120,7 +121,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
 }
 
 Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
-                                                      Status* returnFlags) const {
+                                                      Status* return_flags) const {
     bool try_again = true;
 
     while (try_again) {
@@ -142,10 +143,12 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
             ASSERT(slots[s].buffer_state == BufferState::Free);
             if (slots[s].graphic_buffer != nullptr) {
                 core->FreeBufferLocked(s);
-                *returnFlags |= Status::ReleaseAllBuffers;
+                *return_flags |= Status::ReleaseAllBuffers;
             }
         }
 
+        // Look for a free buffer to give to the client
+        *found = BufferQueueCore::INVALID_BUFFER_SLOT;
         s32 dequeued_count{};
         s32 acquired_count{};
         for (s32 s{}; s < max_buffer_count; ++s) {
@@ -235,68 +238,50 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
     {
         std::scoped_lock lock(core->mutex);
         core->WaitWhileAllocatingLocked();
+
         if (format == PixelFormat::NoFormat) {
             format = core->default_buffer_format;
         }
 
         // Enable the usage bits the consumer requested
         usage |= core->consumer_usage_bit;
-        const bool use_default_size = !width && !height;
-        if (use_default_size) {
-            width = core->default_width;
-            height = core->default_height;
+
+        s32 found{};
+        Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
+        if (status != Status::NoError) {
+            return status;
         }
 
-        s32 found = BufferItem::INVALID_BUFFER_SLOT;
-        while (found == BufferItem::INVALID_BUFFER_SLOT) {
-            Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
-            if (status != Status::NoError) {
-                return status;
-            }
+        // This should not happen
+        if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
+            LOG_ERROR(Service_NVFlinger, "no available buffer slots");
+            return Status::Busy;
+        }
 
-            // This should not happen
-            if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
-                LOG_DEBUG(Service_NVFlinger, "no available buffer slots");
-                return Status::Busy;
-            }
+        *out_slot = found;
 
-            const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
+        attached_by_consumer = slots[found].attached_by_consumer;
 
-            // If we are not allowed to allocate new buffers, WaitForFreeSlotThenRelock must have
-            // returned a slot containing a buffer. If this buffer would require reallocation to
-            // meet the requested attributes, we free it and attempt to get another one.
-            if (!core->allow_allocation) {
-                if (buffer->NeedsReallocation(width, height, format, usage)) {
-                    core->FreeBufferLocked(found);
-                    found = BufferItem::INVALID_BUFFER_SLOT;
-                    continue;
-                }
-            }
+        const bool use_default_size = !width && !height;
+        if (use_default_size) {
+            width = core->default_width;
+            height = core->default_height;
         }
 
-        *out_slot = found;
-        attached_by_consumer = slots[found].attached_by_consumer;
         slots[found].buffer_state = BufferState::Dequeued;
 
         const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
-
-        if ((buffer == nullptr) || buffer->NeedsReallocation(width, height, format, usage)) {
+        if ((buffer == nullptr) || (buffer->Width() != width) || (buffer->Height() != height) ||
+            (buffer->Format() != format) || ((buffer->Usage() & usage) != usage)) {
             slots[found].acquire_called = false;
             slots[found].graphic_buffer = nullptr;
             slots[found].request_buffer_called = false;
             slots[found].fence = Fence::NoFence();
-            core->buffer_age = 0;
+
             return_flags |= Status::BufferNeedsReallocation;
-        } else {
-            // We add 1 because that will be the frame number when this buffer
-            // is queued
-            core->buffer_age = core->frame_counter + 1 - slots[found].frame_number;
         }
 
-        LOG_DEBUG(Service_NVFlinger, "setting buffer age to {}", core->buffer_age);
-
         *out_fence = slots[found].fence;
-
         slots[found].fence = Fence::NoFence();
     }
 
@@ -311,6 +296,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
 
         {
             std::scoped_lock lock(core->mutex);
+
             if (core->is_abandoned) {
                 LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
                 return Status::NoInit;
@@ -327,6 +313,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
 
     LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot,
               slots[*out_slot].frame_number, return_flags);
+
     return return_flags;
 }
 
@@ -334,6 +321,7 @@ Status BufferQueueProducer::DetachBuffer(s32 slot) {
     LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
 
     std::scoped_lock lock(core->mutex);
+
     if (core->is_abandoned) {
         LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
         return Status::NoInit;
@@ -369,7 +357,6 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
     }
 
     std::scoped_lock lock(core->mutex);
-
     core->WaitWhileAllocatingLocked();
 
     if (core->is_abandoned) {
@@ -423,6 +410,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot,
         return status;
     }
 
+    // This should not happen
     if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
         LOG_ERROR(Service_NVFlinger, "No available buffer slots");
         return Status::Busy;
@@ -466,8 +454,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
         return Status::BadValue;
     }
 
-    std::shared_ptr<IConsumerListener> frameAvailableListener;
-    std::shared_ptr<IConsumerListener> frameReplacedListener;
+    std::shared_ptr<IConsumerListener> frame_available_listener;
+    std::shared_ptr<IConsumerListener> frame_replaced_listener;
     s32 callback_ticket{};
     BufferItem item;
 
@@ -541,12 +529,13 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
         item.fence = fence;
         item.is_droppable = core->dequeue_buffer_cannot_block || async;
         item.swap_interval = swap_interval;
+
         sticky_transform = sticky_transform_;
 
         if (core->queue.empty()) {
             // When the queue is empty, we can simply queue this buffer
             core->queue.push_back(item);
-            frameAvailableListener = core->consumer_listener;
+            frame_available_listener = core->consumer_listener;
         } else {
             // When the queue is not empty, we need to look at the front buffer
             // state to see if we need to replace it
@@ -563,10 +552,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
                 }
                 // Overwrite the droppable buffer with the incoming one
                 *front = item;
-                frameReplacedListener = core->consumer_listener;
+                frame_replaced_listener = core->consumer_listener;
             } else {
                 core->queue.push_back(item);
-                frameAvailableListener = core->consumer_listener;
+                frame_available_listener = core->consumer_listener;
             }
         }
 
@@ -592,10 +581,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
             callback_condition.wait(callback_mutex);
         }
 
-        if (frameAvailableListener != nullptr) {
-            frameAvailableListener->OnFrameAvailable(item);
-        } else if (frameReplacedListener != nullptr) {
-            frameReplacedListener->OnFrameReplaced(item);
+        if (frame_available_listener != nullptr) {
+            frame_available_listener->OnFrameAvailable(item);
+        } else if (frame_replaced_listener != nullptr) {
+            frame_replaced_listener->OnFrameReplaced(item);
         }
 
         ++current_callback_ticket;
@@ -669,13 +658,6 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
     case NativeWindow::ConsumerUsageBits:
         value = core->consumer_usage_bit;
         break;
-    case NativeWindow::BufferAge:
-        if (core->buffer_age > INT32_MAX) {
-            value = 0;
-        } else {
-            value = static_cast<u32>(core->buffer_age);
-        }
-        break;
     default:
         UNREACHABLE();
         return Status::BadValue;
@@ -737,7 +719,6 @@ Status BufferQueueProducer::Connect(const std::shared_ptr<IProducerListener>& li
     core->buffer_has_been_queued = false;
     core->dequeue_buffer_cannot_block =
         core->consumer_controlled_by_app && producer_controlled_by_app;
-    core->allow_allocation = true;
 
     return status;
 }
@@ -770,7 +751,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) {
                 core->SignalDequeueCondition();
                 buffer_wait_event->GetWritableEvent().Signal();
                 listener = core->consumer_listener;
-            } else if (core->connected_api != NativeWindowApi::NoConnectedApi) {
+            } else {
                 LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})",
                           core->connected_api, api);
                 status = Status::BadValue;
diff --git a/src/core/hle/service/nvflinger/buffer_queue_producer.h b/src/core/hle/service/nvflinger/buffer_queue_producer.h
index 77fdcae8e..c4ca68fd3 100644
--- a/src/core/hle/service/nvflinger/buffer_queue_producer.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_producer.h
@@ -66,7 +66,7 @@ public:
 private:
     BufferQueueProducer(const BufferQueueProducer&) = delete;
 
-    Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* returnFlags) const;
+    Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* return_flags) const;
 
     Kernel::KEvent* buffer_wait_event{};
     Service::KernelHelpers::ServiceContext& service_context;
diff --git a/src/core/hle/service/nvflinger/consumer_base.cpp b/src/core/hle/service/nvflinger/consumer_base.cpp
index be65a3f88..c2c80832c 100644
--- a/src/core/hle/service/nvflinger/consumer_base.cpp
+++ b/src/core/hle/service/nvflinger/consumer_base.cpp
@@ -36,38 +36,41 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) {
 }
 
 void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
-    std::scoped_lock lock(mutex);
     LOG_DEBUG(Service_NVFlinger, "called");
 }
 
 void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
-    std::scoped_lock lock(mutex);
     LOG_DEBUG(Service_NVFlinger, "called");
 }
 
 void ConsumerBase::OnBuffersReleased() {
     std::scoped_lock lock(mutex);
-    LOG_DEBUG(Service_NVFlinger, "called");
-}
 
-void ConsumerBase::OnSidebandStreamChanged() {}
+    LOG_DEBUG(Service_NVFlinger, "called");
 
-Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when,
-                                         u64 max_frame_number) {
     if (is_abandoned) {
-        LOG_ERROR(Service_NVFlinger, "consumer is abandoned!");
-        return Status::NoInit;
+        // Nothing to do if we're already abandoned.
+        return;
     }
 
-    Status err = consumer->AcquireBuffer(item, present_when, max_frame_number);
+    u64 mask = 0;
+    consumer->GetReleasedBuffers(&mask);
+    for (int i = 0; i < BufferQueueDefs::NUM_BUFFER_SLOTS; i++) {
+        if (mask & (1ULL << i)) {
+            FreeBufferLocked(i);
+        }
+    }
+}
+
+void ConsumerBase::OnSidebandStreamChanged() {}
+
+Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when) {
+    Status err = consumer->AcquireBuffer(item, present_when);
     if (err != Status::NoError) {
         return err;
     }
 
     if (item->graphic_buffer != nullptr) {
-        if (slots[item->slot].graphic_buffer != nullptr) {
-            FreeBufferLocked(item->slot);
-        }
         slots[item->slot].graphic_buffer = item->graphic_buffer;
     }
 
diff --git a/src/core/hle/service/nvflinger/consumer_base.h b/src/core/hle/service/nvflinger/consumer_base.h
index 9ab949420..736080e3a 100644
--- a/src/core/hle/service/nvflinger/consumer_base.h
+++ b/src/core/hle/service/nvflinger/consumer_base.h
@@ -35,8 +35,7 @@ protected:
     virtual void OnSidebandStreamChanged() override;
 
     void FreeBufferLocked(s32 slot_index);
-    Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when,
-                               u64 max_frame_number = 0);
+    Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when);
     Status ReleaseBufferLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer);
     bool StillTracking(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer) const;
     Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer,
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 76ce1fbfd..6fb2cdff1 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -104,7 +104,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
 std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
     const auto lock_guard = Lock();
 
-    LOG_DEBUG(Service, "Opening \"{}\" display", name);
+    LOG_DEBUG(Service_NVFlinger, "Opening \"{}\" display", name);
 
     const auto itr =
         std::find_if(displays.begin(), displays.end(),
@@ -219,7 +219,7 @@ VI::Layer* NVFlinger::FindOrCreateLayer(u64 display_id, u64 layer_id) {
     auto* layer = display->FindLayer(layer_id);
 
     if (layer == nullptr) {
-        LOG_DEBUG(Service, "Layer at id {} not found. Trying to create it.", layer_id);
+        LOG_DEBUG(Service_NVFlinger, "Layer at id {} not found. Trying to create it.", layer_id);
         CreateLayerAtId(*display, layer_id);
         return display->FindLayer(layer_id);
     }
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index ab3286db9..0f59a03c5 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -32,6 +32,7 @@
 #include "core/hle/service/glue/glue.h"
 #include "core/hle/service/grc/grc.h"
 #include "core/hle/service/hid/hid.h"
+#include "core/hle/service/jit/jit.h"
 #include "core/hle/service/lbl/lbl.h"
 #include "core/hle/service/ldn/ldn.h"
 #include "core/hle/service/ldr/ldr.h"
@@ -91,8 +92,9 @@ namespace Service {
 }
 
 ServiceFrameworkBase::ServiceFrameworkBase(Core::System& system_, const char* service_name_,
-                                           u32 max_sessions_, InvokerFn* handler_invoker_)
-    : SessionRequestHandler(system_.Kernel(), service_name_), system{system_},
+                                           ServiceThreadType thread_type, u32 max_sessions_,
+                                           InvokerFn* handler_invoker_)
+    : SessionRequestHandler(system_.Kernel(), service_name_, thread_type), system{system_},
       service_name{service_name_}, max_sessions{max_sessions_}, handler_invoker{handler_invoker_} {}
 
 ServiceFrameworkBase::~ServiceFrameworkBase() {
@@ -261,6 +263,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system
     Glue::InstallInterfaces(system);
     GRC::InstallInterfaces(*sm, system);
     HID::InstallInterfaces(*sm, system);
+    JIT::InstallInterfaces(*sm, system);
     LBL::InstallInterfaces(*sm, system);
     LDN::InstallInterfaces(*sm, system);
     LDR::InstallInterfaces(*sm, system);
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index b9ab2c465..c78b2baeb 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -114,7 +114,8 @@ private:
                            Kernel::HLERequestContext& ctx);
 
     explicit ServiceFrameworkBase(Core::System& system_, const char* service_name_,
-                                  u32 max_sessions_, InvokerFn* handler_invoker_);
+                                  ServiceThreadType thread_type, u32 max_sessions_,
+                                  InvokerFn* handler_invoker_);
     ~ServiceFrameworkBase() override;
 
     void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
@@ -176,14 +177,17 @@ protected:
     /**
      * Initializes the handler with no functions installed.
      *
-     * @param system_       The system context to construct this service under.
+     * @param system_ The system context to construct this service under.
      * @param service_name_ Name of the service.
-     * @param max_sessions_ Maximum number of sessions that can be
-     *                      connected to this service at the same time.
+     * @param thread_type Specifies the thread type for this service. If this is set to CreateNew,
+     *                    it creates a new thread for it, otherwise this uses the default thread.
+     * @param max_sessions_ Maximum number of sessions that can be connected to this service at the
+     * same time.
      */
     explicit ServiceFramework(Core::System& system_, const char* service_name_,
+                              ServiceThreadType thread_type = ServiceThreadType::Default,
                               u32 max_sessions_ = ServerSessionCountMax)
-        : ServiceFrameworkBase(system_, service_name_, max_sessions_, Invoker) {}
+        : ServiceFrameworkBase(system_, service_name_, thread_type, max_sessions_, Invoker) {}
 
     /// Registers handlers in the service.
     template <std::size_t N>
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 695a1faa6..97f895852 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -206,7 +206,7 @@ void SM::UnregisterService(Kernel::HLERequestContext& ctx) {
 }
 
 SM::SM(ServiceManager& service_manager_, Core::System& system_)
-    : ServiceFramework{system_, "sm:", 4},
+    : ServiceFramework{system_, "sm:", ServiceThreadType::Default, 4},
       service_manager{service_manager_}, kernel{system_.Kernel()} {
     RegisterHandlers({
         {0, &SM::Initialize, "Initialize"},
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index fc93fb743..d6702e4e1 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -837,7 +837,8 @@ void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) co
     rb.PushEnum(bsd_errno);
 }
 
-BSD::BSD(Core::System& system_, const char* name) : ServiceFramework{system_, name} {
+BSD::BSD(Core::System& system_, const char* name)
+    : ServiceFramework{system_, name, ServiceThreadType::CreateNew} {
     // clang-format off
     static const FunctionInfo functions[] = {
         {0, &BSD::RegisterClient, "RegisterClient"},
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 430cbc546..a3436c8ea 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -77,7 +77,8 @@ static_assert(sizeof(NativeWindow) == 0x28, "NativeWindow has wrong size");
 class IHOSBinderDriver final : public ServiceFramework<IHOSBinderDriver> {
 public:
     explicit IHOSBinderDriver(Core::System& system_, NVFlinger::HosBinderDriverServer& server_)
-        : ServiceFramework{system_, "IHOSBinderDriver"}, server(server_) {
+        : ServiceFramework{system_, "IHOSBinderDriver", ServiceThreadType::CreateNew},
+          server(server_) {
         static const FunctionInfo functions[] = {
             {0, &IHOSBinderDriver::TransactParcel, "TransactParcel"},
             {1, &IHOSBinderDriver::AdjustRefcount, "AdjustRefcount"},
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 0c1fbc7b1..282668b36 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -35,6 +35,15 @@ std::string_view OutputVertexIndex(EmitContext& ctx) {
     return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
 }
 
+std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
+    if (binding.IsImmediate()) {
+        return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
+    } else {
+        const auto binding_var{ctx.var_alloc.Consume(binding)};
+        return fmt::format("GetCbufIndirect({},{})", binding_var, index);
+    }
+}
+
 void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
              const IR::Value& offset, u32 num_bits, std::string_view cast = {},
              std::string_view bit_offset = {}) {
@@ -55,8 +64,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
     const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
                                     : fmt::format("[({}>>2)%4]", offset_var)};
 
-    const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
-    const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
+    const auto cbuf{ChooseCbuf(ctx, binding, index)};
+    const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)};
     const auto extraction{num_bits == 32 ? cbuf_cast
                                          : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
                                                        bit_offset, num_bits)};
@@ -140,9 +149,9 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
 
 void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                       const IR::Value& offset) {
-    const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
     const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
     if (offset.IsImmediate()) {
+        const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
         static constexpr u32 cbuf_size{0x10000};
         const u32 u32_offset{offset.U32()};
         const s32 signed_offset{static_cast<s32>(offset.U32())};
@@ -162,17 +171,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
         return;
     }
     const auto offset_var{ctx.var_alloc.Consume(offset)};
+    const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))};
     if (!ctx.profile.has_gl_component_indexing_bug) {
-        ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
-                     cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
+        ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
+                     offset_var, cast, cbuf, offset_var);
         return;
     }
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
     const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
     for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
-        ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
-                swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
-                "xyzw"[(swizzle + 1) % 4]);
+        ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
+                cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]);
     }
 }
 
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index e816a93ec..17266f40d 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -359,6 +359,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
         header += "layout(location=0) uniform vec4 scaling;";
     }
     DefineConstantBuffers(bindings);
+    DefineConstantBufferIndirect();
     DefineStorageBuffers(bindings);
     SetupImages(bindings);
     SetupTextures(bindings);
@@ -436,6 +437,24 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
     }
 }
 
+void EmitContext::DefineConstantBufferIndirect() {
+    if (!info.uses_cbuf_indirect) {
+        return;
+    }
+
+    header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 ";
+    header += "GetCbufIndirect(uint binding, uint offset){"
+              "switch(binding){"
+              "default:";
+
+    for (const auto& desc : info.constant_buffer_descriptors) {
+        header +=
+            fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index);
+    }
+
+    header += "}}";
+}
+
 void EmitContext::DefineStorageBuffers(Bindings& bindings) {
     if (info.storage_buffers_descriptors.empty()) {
         return;
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
index d9b639d29..2b13db6e6 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
@@ -162,6 +162,7 @@ public:
 private:
     void SetupExtensions();
     void DefineConstantBuffers(Bindings& bindings);
+    void DefineConstantBufferIndirect();
     void DefineStorageBuffers(Bindings& bindings);
     void DefineGenericOutput(size_t index, u32 invocations);
     void DefineHelperFunctions();
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 28f6a6184..9c83cd2e4 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1043,15 +1043,15 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
         const Id merge_label{OpLabel()};
         const Id uniform_type{uniform_types.*member_ptr};
 
-        std::array<Id, Info::MAX_CBUFS> buf_labels;
-        std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals;
-        for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
+        std::array<Id, Info::MAX_INDIRECT_CBUFS> buf_labels;
+        std::array<Sirit::Literal, Info::MAX_INDIRECT_CBUFS> buf_literals;
+        for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
             buf_labels[i] = OpLabel();
             buf_literals[i] = Sirit::Literal{i};
         }
         OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
         OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
-        for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
+        for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
             AddLabel(buf_labels[i]);
             const Id cbuf{cbufs[i].*member_ptr};
             const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
@@ -1064,22 +1064,23 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
         return func;
     }};
     IR::Type types{info.used_indirect_cbuf_types};
-    if (True(types & IR::Type::U8)) {
+    bool supports_aliasing = profile.support_descriptor_aliasing;
+    if (supports_aliasing && True(types & IR::Type::U8)) {
         load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
     }
-    if (True(types & IR::Type::U16)) {
+    if (supports_aliasing && True(types & IR::Type::U16)) {
         load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
     }
-    if (True(types & IR::Type::F32)) {
+    if (supports_aliasing && True(types & IR::Type::F32)) {
         load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
     }
-    if (True(types & IR::Type::U32)) {
+    if (supports_aliasing && True(types & IR::Type::U32)) {
         load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
     }
-    if (True(types & IR::Type::U32x2)) {
+    if (supports_aliasing && True(types & IR::Type::U32x2)) {
         load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
     }
-    if (True(types & IR::Type::U32x4)) {
+    if (!supports_aliasing || True(types & IR::Type::U32x4)) {
         load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
     }
 }
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 0b2c60842..16278faab 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -32,13 +32,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
 void AddRegisterIndexedLdc(Info& info) {
     info.uses_cbuf_indirect = true;
 
-    // The shader can use any possible constant buffer
-    info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1;
-
-    auto& cbufs{info.constant_buffer_descriptors};
-    cbufs.clear();
-    for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
-        cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
+    for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
+        AddConstantBufferDescriptor(info, i, 1);
 
         // The shader can use any possible access size
         info.constant_buffer_used_sizes[i] = 0x10'000;
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 9d36bd9eb..a3a09c71c 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -105,6 +105,7 @@ struct ImageDescriptor {
 using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
 
 struct Info {
+    static constexpr size_t MAX_INDIRECT_CBUFS{14};
     static constexpr size_t MAX_CBUFS{18};
     static constexpr size_t MAX_SSBOS{32};
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e6f9ece8b..7ab7f0c0a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -520,6 +520,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
     // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
 
+    screen_info.texture.width = image_view->size.width;
+    screen_info.texture.height = image_view->size.height;
     screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
     screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
     return true;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f8f29013a..3a3c213bb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -208,6 +208,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
     // Framebuffer orientation handling
     framebuffer_transform_flags = framebuffer.transform_flags;
     framebuffer_crop_rect = framebuffer.crop_rect;
+    framebuffer_width = framebuffer.width;
+    framebuffer_height = framebuffer.height;
 
     const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
     screen_info.was_accelerated =
@@ -480,9 +482,12 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
     ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
 
+    f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
+    f32 scale_v =
+        static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
+
     // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
     // (e.g. handheld mode) on a 1920x1080 framebuffer.
-    f32 scale_u = 1.f, scale_v = 1.f;
     if (framebuffer_crop_rect.GetWidth() > 0) {
         scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
                   static_cast<f32>(screen_info.texture.width);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index aa206878b..ae9558a33 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -137,6 +137,8 @@ private:
     /// Used for transforming the framebuffer orientation
     Service::android::BufferTransformFlags framebuffer_transform_flags{};
     Common::Rectangle<int> framebuffer_crop_rect;
+    u32 framebuffer_width;
+    u32 framebuffer_height;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index d893c1952..b866e9103 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -1406,8 +1406,9 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
     UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
     UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
 
-    f32 scale_u = 1.0f;
-    f32 scale_v = 1.0f;
+    f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
+    f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
+
     // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
     // (e.g. handheld mode) on a 1920x1080 framebuffer.
     if (!fsr) {