28 files changed, 571 insertions, 216 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 16d528994..59da33f30 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -22,10 +22,16 @@ public:
         std::array<u64, 31> cpu_registers;
         u64 sp;
         u64 pc;
-        u64 pstate;
+        u32 pstate;
+        std::array<u8, 4> padding;
         std::array<u128, 32> vector_registers;
-        u64 fpcr;
+        u32 fpcr;
+        u32 fpsr;
+        u64 tpidr;
     };
+    // Internally within the kernel, it expects the AArch64 version of the
+    // thread context to be 800 bytes in size.
+    static_assert(sizeof(ThreadContext) == 0x320);
 
     /// Runs the CPU until an event happens
     virtual void Run() = 0;
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 8cad070b4..05cc84458 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -130,7 +130,7 @@ public:
 
 std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
     auto& current_process = Core::CurrentProcess();
-    auto** const page_table = current_process->vm_manager.page_table.pointers.data();
+    auto** const page_table = current_process->VMManager().page_table.pointers.data();
 
     Dynarmic::A64::UserConfig config;
 
@@ -139,7 +139,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
 
     // Memory
     config.page_table = reinterpret_cast<void**>(page_table);
-    config.page_table_address_space_bits = current_process->vm_manager.GetAddressSpaceWidth();
+    config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth();
     config.silently_mirror_page_table = false;
 
     // Multi-process state
@@ -247,15 +247,19 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
     ctx.pstate = jit->GetPstate();
     ctx.vector_registers = jit->GetVectors();
     ctx.fpcr = jit->GetFpcr();
+    ctx.fpsr = jit->GetFpsr();
+    ctx.tpidr = cb->tpidr_el0;
 }
 
 void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
     jit->SetRegisters(ctx.cpu_registers);
     jit->SetSP(ctx.sp);
     jit->SetPC(ctx.pc);
-    jit->SetPstate(static_cast<u32>(ctx.pstate));
+    jit->SetPstate(ctx.pstate);
     jit->SetVectors(ctx.vector_registers);
-    jit->SetFpcr(static_cast<u32>(ctx.fpcr));
+    jit->SetFpcr(ctx.fpcr);
+    jit->SetFpsr(ctx.fpsr);
+    SetTPIDR_EL0(ctx.tpidr);
 }
 
 void ARM_Dynarmic::PrepareReschedule() {
diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp
index 3d1a3685e..d027a8d59 100644
--- a/src/core/file_sys/romfs_factory.cpp
+++ b/src/core/file_sys/romfs_factory.cpp
@@ -34,7 +34,7 @@ ResultVal<VirtualFile> RomFSFactory::OpenCurrentProcess() {
     if (!updatable)
         return MakeResult<VirtualFile>(file);
 
-    const PatchManager patch_manager(Core::CurrentProcess()->program_id);
+    const PatchManager patch_manager(Core::CurrentProcess()->GetTitleID());
     return MakeResult<VirtualFile>(patch_manager.PatchRomFS(file, ivfc_offset));
 }
 
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index 9b2c51bbd..47f2ab9e0 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -81,7 +81,7 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
     // According to switchbrew, if a save is of type SaveData and the title id field is 0, it should
     // be interpreted as the title id of the current process.
     if (type == SaveDataType::SaveData && title_id == 0)
-        title_id = Core::CurrentProcess()->program_id;
+        title_id = Core::CurrentProcess()->GetTitleID();
 
     std::string out;
 
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index d8c7b3492..5bc947010 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -250,7 +250,7 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
     } else if (id == PC_REGISTER) {
         thread->context.pc = val;
     } else if (id == PSTATE_REGISTER) {
-        thread->context.pstate = val;
+        thread->context.pstate = static_cast<u32>(val);
     } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
         thread->context.vector_registers[id - (PSTATE_REGISTER + 1)][0] = val;
     }
@@ -587,7 +587,7 @@ static void HandleQuery() {
                        strlen("Xfer:features:read:target.xml:")) == 0) {
         SendReply(target_xml);
     } else if (strncmp(query, "Offsets", strlen("Offsets")) == 0) {
-        const VAddr base_address = Core::CurrentProcess()->vm_manager.GetCodeRegionBaseAddress();
+        const VAddr base_address = Core::CurrentProcess()->VMManager().GetCodeRegionBaseAddress();
         std::string buffer = fmt::format("TextSeg={:0x}", base_address);
         SendReply(buffer.c_str());
     } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
@@ -909,7 +909,7 @@ static void ReadMemory() {
         SendReply("E01");
     }
 
-    const auto& vm_manager = Core::CurrentProcess()->vm_manager;
+    const auto& vm_manager = Core::CurrentProcess()->VMManager();
     if (addr < vm_manager.GetCodeRegionBaseAddress() ||
         addr >= vm_manager.GetMapRegionEndAddress()) {
         return SendReply("E00");
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index 8c2be2681..e5fa67ae8 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -31,6 +31,7 @@ enum {
     TooLarge = 119,
     InvalidEnumValue = 120,
     NoSuchEntry = 121,
+    AlreadyRegistered = 122,
     InvalidState = 125,
     ResourceLimitExceeded = 132,
 };
@@ -58,6 +59,7 @@ constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
 constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
 constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
 constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);
+constexpr ResultCode ERR_ALREADY_REGISTERED(ErrorModule::Kernel, ErrCodes::AlreadyRegistered);
 constexpr ResultCode ERR_INVALID_STATE(ErrorModule::Kernel, ErrCodes::InvalidState);
 constexpr ResultCode ERR_INVALID_THREAD_PRIORITY(ErrorModule::Kernel,
                                                  ErrCodes::InvalidThreadPriority);
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index a8e3098ca..dc9fc8470 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -47,6 +47,7 @@ SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
 
 void Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     program_id = metadata.GetTitleID();
+    is_64bit_process = metadata.Is64BitProgram();
     vm_manager.Reset(metadata.GetAddressSpaceType());
 }
 
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index adb03c228..590e0c73d 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,16 @@ public:
         return HANDLE_TYPE;
     }
 
+    /// Gets a reference to the process' memory manager.
+    Kernel::VMManager& VMManager() {
+        return vm_manager;
+    }
+
+    /// Gets a const reference to the process' memory manager.
+    const Kernel::VMManager& VMManager() const {
+        return vm_manager;
+    }
+
     /// Gets the current status of the process
     ProcessStatus GetStatus() const {
         return status;
@@ -145,6 +155,45 @@ public:
         return process_id;
     }
 
+    /// Gets the title ID corresponding to this process.
+    u64 GetTitleID() const {
+        return program_id;
+    }
+
+    /// Gets the resource limit descriptor for this process
+    ResourceLimit& GetResourceLimit() {
+        return *resource_limit;
+    }
+
+    /// Gets the resource limit descriptor for this process
+    const ResourceLimit& GetResourceLimit() const {
+        return *resource_limit;
+    }
+
+    /// Gets the default CPU ID for this process
+    u8 GetDefaultProcessorID() const {
+        return ideal_processor;
+    }
+
+    /// Gets the bitmask of allowed CPUs that this process' threads can run on.
+    u32 GetAllowedProcessorMask() const {
+        return allowed_processor_mask;
+    }
+
+    /// Gets the bitmask of allowed thread priorities.
+    u32 GetAllowedThreadPriorityMask() const {
+        return allowed_thread_priority_mask;
+    }
+
+    u32 IsVirtualMemoryEnabled() const {
+        return is_virtual_address_memory_enabled;
+    }
+
+    /// Whether this process is an AArch64 or AArch32 process.
+    bool Is64BitProcess() const {
+        return is_64bit_process;
+    }
+
     /**
      * Loads process-specifics configuration info with metadata provided
      * by an executable.
@@ -153,30 +202,6 @@ public:
      */
     void LoadFromMetadata(const FileSys::ProgramMetadata& metadata);
 
-    /// Title ID corresponding to the process
-    u64 program_id;
-
-    /// Resource limit descriptor for this process
-    SharedPtr<ResourceLimit> resource_limit;
-
-    /// The process may only call SVCs which have the corresponding bit set.
-    std::bitset<0x80> svc_access_mask;
-    /// Maximum size of the handle table for the process.
-    unsigned int handle_table_size = 0x200;
-    /// Special memory ranges mapped into this processes address space. This is used to give
-    /// processes access to specific I/O regions and device memory.
-    boost::container::static_vector<AddressMapping, 8> address_mappings;
-    ProcessFlags flags;
-    /// Kernel compatibility version for this process
-    u16 kernel_version = 0;
-    /// The default CPU for this process, threads are scheduled on this cpu by default.
-    u8 ideal_processor = 0;
-    /// Bitmask of allowed CPUs that this process' threads can run on. TODO(Subv): Actually parse
-    /// this value from the process header.
-    u32 allowed_processor_mask = THREADPROCESSORID_DEFAULT_MASK;
-    u32 allowed_thread_priority_mask = 0xFFFFFFFF;
-    u32 is_virtual_address_memory_enabled = 0;
-
     /**
      * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
      * to this process.
@@ -212,18 +237,43 @@ public:
 
     ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size);
 
-    VMManager vm_manager;
-
 private:
     explicit Process(KernelCore& kernel);
     ~Process() override;
 
+    /// Memory manager for this process.
+    Kernel::VMManager vm_manager;
+
     /// Current status of the process
     ProcessStatus status;
 
     /// The ID of this process
     u32 process_id = 0;
 
+    /// Title ID corresponding to the process
+    u64 program_id;
+
+    /// Resource limit descriptor for this process
+    SharedPtr<ResourceLimit> resource_limit;
+
+    /// The process may only call SVCs which have the corresponding bit set.
+    std::bitset<0x80> svc_access_mask;
+    /// Maximum size of the handle table for the process.
+    u32 handle_table_size = 0x200;
+    /// Special memory ranges mapped into this processes address space. This is used to give
+    /// processes access to specific I/O regions and device memory.
+    boost::container::static_vector<AddressMapping, 8> address_mappings;
+    ProcessFlags flags;
+    /// Kernel compatibility version for this process
+    u16 kernel_version = 0;
+    /// The default CPU for this process, threads are scheduled on this cpu by default.
+    u8 ideal_processor = 0;
+    /// Bitmask of allowed CPUs that this process' threads can run on. TODO(Subv): Actually parse
+    /// this value from the process header.
+    u32 allowed_processor_mask = THREADPROCESSORID_DEFAULT_MASK;
+    u32 allowed_thread_priority_mask = 0xFFFFFFFF;
+    u32 is_virtual_address_memory_enabled = 0;
+
     // Memory used to back the allocations in the regular heap. A single vector is used to cover
     // the entire virtual address space extents that bound the allocations, including any holes.
     // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
@@ -242,6 +292,11 @@ private:
     /// This vector will grow as more pages are allocated for new threads.
     std::vector<std::bitset<8>> tls_slots;
 
+    /// Whether or not this process is AArch64, or AArch32.
+    /// By default, we currently assume this is true, unless otherwise
+    /// specified by metadata provided to the process during loading.
+    bool is_64bit_process = true;
+
     std::string name;
 };
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 9faf903cf..1e82cfffb 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -88,7 +88,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
         if (previous_process != current_thread->owner_process) {
             Core::CurrentProcess() = current_thread->owner_process;
-            SetCurrentPageTable(&Core::CurrentProcess()->vm_manager.page_table);
+            SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
         }
 
         cpu_core.LoadContext(new_thread->context);
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 9b78c8cb5..d061e6155 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -35,11 +35,11 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, SharedPtr<Proce
 
         // Refresh the address mappings for the current process.
         if (Core::CurrentProcess() != nullptr) {
-            Core::CurrentProcess()->vm_manager.RefreshMemoryBlockMappings(
+            Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
                 shared_memory->backing_block.get());
         }
     } else {
-        auto& vm_manager = shared_memory->owner_process->vm_manager;
+        auto& vm_manager = shared_memory->owner_process->VMManager();
 
         // The memory is already available and mapped in the owner process.
         auto vma = vm_manager.FindVMA(address);
@@ -73,7 +73,7 @@ SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
     shared_memory->backing_block = std::move(heap_block);
     shared_memory->backing_block_offset = offset;
     shared_memory->base_address =
-        kernel.CurrentProcess()->vm_manager.GetHeapRegionBaseAddress() + offset;
+        kernel.CurrentProcess()->VMManager().GetHeapRegionBaseAddress() + offset;
 
     return shared_memory;
 }
@@ -107,7 +107,7 @@ ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermi
     VAddr target_address = address;
 
     // Map the memory block into the target process
-    auto result = target_process->vm_manager.MapMemoryBlock(
+    auto result = target_process->VMManager().MapMemoryBlock(
         target_address, backing_block, backing_block_offset, size, MemoryState::Shared);
     if (result.Failed()) {
         LOG_ERROR(
@@ -117,14 +117,14 @@ ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermi
         return result.Code();
     }
 
-    return target_process->vm_manager.ReprotectRange(target_address, size,
-                                                     ConvertPermissions(permissions));
+    return target_process->VMManager().ReprotectRange(target_address, size,
+                                                      ConvertPermissions(permissions));
 }
 
 ResultCode SharedMemory::Unmap(Process* target_process, VAddr address) {
     // TODO(Subv): Verify what happens if the application tries to unmap an address that is not
     // mapped to a SharedMemory.
-    return target_process->vm_manager.UnmapRange(address, size);
+    return target_process->VMManager().UnmapRange(address, size);
 }
 
 VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 44bbaf0c8..1cdaa740a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -51,7 +51,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
     }
 
     auto& process = *Core::CurrentProcess();
-    const VAddr heap_base = process.vm_manager.GetHeapRegionBaseAddress();
+    const VAddr heap_base = process.VMManager().GetHeapRegionBaseAddress();
     CASCADE_RESULT(*heap_addr,
                    process.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite));
     return RESULT_SUCCESS;
@@ -327,14 +327,14 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
               info_sub_id, handle);
 
     const auto& current_process = Core::CurrentProcess();
-    const auto& vm_manager = current_process->vm_manager;
+    const auto& vm_manager = current_process->VMManager();
 
     switch (static_cast<GetInfoType>(info_id)) {
     case GetInfoType::AllowedCpuIdBitmask:
-        *result = current_process->allowed_processor_mask;
+        *result = current_process->GetAllowedProcessorMask();
         break;
     case GetInfoType::AllowedThreadPrioBitmask:
-        *result = current_process->allowed_thread_priority_mask;
+        *result = current_process->GetAllowedThreadPriorityMask();
         break;
     case GetInfoType::MapRegionBaseAddr:
         *result = vm_manager.GetMapRegionBaseAddress();
@@ -386,10 +386,10 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
         *result = vm_manager.GetNewMapRegionSize();
         break;
     case GetInfoType::IsVirtualAddressMemoryEnabled:
-        *result = current_process->is_virtual_address_memory_enabled;
+        *result = current_process->IsVirtualMemoryEnabled();
         break;
     case GetInfoType::TitleId:
-        *result = current_process->program_id;
+        *result = current_process->GetTitleID();
         break;
     case GetInfoType::PrivilegedProcessId:
         LOG_WARNING(Kernel_SVC,
@@ -415,8 +415,36 @@ static ResultCode SetThreadActivity(Handle handle, u32 unknown) {
 }
 
 /// Gets the thread context
-static ResultCode GetThreadContext(Handle handle, VAddr addr) {
-    LOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}, addr=0x{:X}", handle, addr);
+static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
+    LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    const auto current_process = Core::CurrentProcess();
+    if (thread->owner_process != current_process) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (thread == GetCurrentThread()) {
+        return ERR_ALREADY_REGISTERED;
+    }
+
+    Core::ARM_Interface::ThreadContext ctx = thread->context;
+    // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
+    ctx.pstate &= 0xFF0FFE20;
+
+    // If 64-bit, we can just write the context registers directly and we're good.
+    // However, if 32-bit, we have to ensure some registers are zeroed out.
+    if (!current_process->Is64BitProcess()) {
+        std::fill(ctx.cpu_registers.begin() + 15, ctx.cpu_registers.end(), 0);
+        std::fill(ctx.vector_registers.begin() + 16, ctx.vector_registers.end(), u128{});
+    }
+
+    Memory::WriteBlock(thread_context, &ctx, sizeof(ctx));
     return RESULT_SUCCESS;
 }
 
@@ -444,8 +472,8 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
 
     // Note: The kernel uses the current process's resource limit instead of
     // the one from the thread owner's resource limit.
-    SharedPtr<ResourceLimit>& resource_limit = Core::CurrentProcess()->resource_limit;
-    if (resource_limit->GetMaxResourceValue(ResourceType::Priority) > priority) {
+    const ResourceLimit& resource_limit = Core::CurrentProcess()->GetResourceLimit();
+    if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
         return ERR_NOT_AUTHORIZED;
     }
 
@@ -519,9 +547,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* /*page_i
     if (!process) {
         return ERR_INVALID_HANDLE;
     }
-    auto vma = process->vm_manager.FindVMA(addr);
+    auto vma = process->VMManager().FindVMA(addr);
     memory_info->attributes = 0;
-    if (vma == Core::CurrentProcess()->vm_manager.vma_map.end()) {
+    if (vma == Core::CurrentProcess()->VMManager().vma_map.end()) {
         memory_info->base_address = 0;
         memory_info->permission = static_cast<u32>(VMAPermission::None);
         memory_info->size = 0;
@@ -568,14 +596,14 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
         return ERR_INVALID_THREAD_PRIORITY;
     }
 
-    SharedPtr<ResourceLimit>& resource_limit = Core::CurrentProcess()->resource_limit;
-    if (resource_limit->GetMaxResourceValue(ResourceType::Priority) > priority) {
+    const ResourceLimit& resource_limit = Core::CurrentProcess()->GetResourceLimit();
+    if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
         return ERR_NOT_AUTHORIZED;
     }
 
     if (processor_id == THREADPROCESSORID_DEFAULT) {
         // Set the target CPU to the one specified in the process' exheader.
-        processor_id = Core::CurrentProcess()->ideal_processor;
+        processor_id = Core::CurrentProcess()->GetDefaultProcessorID();
         ASSERT(processor_id != THREADPROCESSORID_DEFAULT);
     }
 
@@ -902,10 +930,10 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
     }
 
     if (core == static_cast<u32>(THREADPROCESSORID_DEFAULT)) {
-        ASSERT(thread->owner_process->ideal_processor !=
+        ASSERT(thread->owner_process->GetDefaultProcessorID() !=
                static_cast<u8>(THREADPROCESSORID_DEFAULT));
         // Set the target CPU to the one specified in the process' exheader.
-        core = thread->owner_process->ideal_processor;
+        core = thread->owner_process->GetDefaultProcessorID();
         mask = 1ull << core;
     }
 
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index fea9ba5ea..22712e64f 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -64,6 +64,11 @@ void SvcWrap() {
     FuncReturn(func(Param(0), (s32)Param(1)).raw);
 }
 
+template <ResultCode func(u64, u32)>
+void SvcWrap() {
+    FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw);
+}
+
 template <ResultCode func(u64*, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 064ed908d..b5c16cfbb 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -259,10 +259,10 @@ void Thread::BoostPriority(u32 priority) {
 SharedPtr<Thread> SetupMainThread(KernelCore& kernel, VAddr entry_point, u32 priority,
                                   Process& owner_process) {
     // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.vm_manager.page_table);
+    SetCurrentPageTable(&owner_process.VMManager().page_table);
 
     // Initialize new "main" thread
-    const VAddr stack_top = owner_process.vm_manager.GetTLSIORegionEndAddress();
+    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
     auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, THREADPROCESSORID_0,
                                      stack_top, &owner_process);
 
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 2212b2cdd..2f15ac2a6 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -51,7 +51,7 @@ enum class FatalType : u32 {
 };
 
 static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
-    const auto title_id = Core::CurrentProcess()->program_id;
+    const auto title_id = Core::CurrentProcess()->GetTitleID();
     std::string crash_report =
         fmt::format("Yuzu {}-{} crash report\n"
                     "Title ID:                        {:016x}\n"
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index 1069d103f..4b2f758a8 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -317,9 +317,9 @@ void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) {
 
 void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
     // Map backing memory for the font data
-    Core::CurrentProcess()->vm_manager.MapMemoryBlock(SHARED_FONT_MEM_VADDR, impl->shared_font, 0,
-                                                      SHARED_FONT_MEM_SIZE,
-                                                      Kernel::MemoryState::Shared);
+    Core::CurrentProcess()->VMManager().MapMemoryBlock(SHARED_FONT_MEM_VADDR, impl->shared_font, 0,
+                                                       SHARED_FONT_MEM_SIZE,
+                                                       Kernel::MemoryState::Shared);
 
     // Create shared font memory object
     auto& kernel = Core::System::GetInstance().Kernel();
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 2ee60f1ec..bbc02abcc 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -612,7 +612,7 @@ public:
             {3000, nullptr, "ListDisplayModes"},
             {3001, nullptr, "ListDisplayRgbRanges"},
             {3002, nullptr, "ListDisplayContentTypes"},
-            {3200, nullptr, "GetDisplayMode"},
+            {3200, &ISystemDisplayService::GetDisplayMode, "GetDisplayMode"},
             {3201, nullptr, "SetDisplayMode"},
             {3202, nullptr, "GetDisplayUnderscan"},
             {3203, nullptr, "SetDisplayUnderscan"},
@@ -663,6 +663,24 @@ private:
         LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id,
                     visibility);
     }
+
+    void GetDisplayMode(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 6};
+        rb.Push(RESULT_SUCCESS);
+
+        if (Settings::values.use_docked_mode) {
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
+        } else {
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
+            rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
+        }
+
+        rb.PushRaw<float>(60.0f);
+        rb.Push<u32>(0);
+
+        LOG_DEBUG(Service_VI, "called");
+    }
 };
 
 class IManagerDisplayService final : public ServiceFramework<IManagerDisplayService> {
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 1b198cc5c..c1824b9c3 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -132,7 +132,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
     process.LoadFromMetadata(metadata);
 
     // Load NSO modules
-    const VAddr base_address = process.vm_manager.GetCodeRegionBaseAddress();
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     VAddr next_load_addr = base_address;
     for (const auto& module : {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3",
                                "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}) {
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 5712a2a11..e67b49fc9 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -395,7 +395,7 @@ ResultStatus AppLoader_ELF::Load(Kernel::Process& process) {
     if (buffer.size() != file->GetSize())
         return ResultStatus::ErrorIncorrectELFFileSize;
 
-    const VAddr base_address = process.vm_manager.GetCodeRegionBaseAddress();
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     ElfReader elf_reader(&buffer[0]);
     SharedPtr<CodeSet> codeset = elf_reader.LoadInto(base_address);
     codeset->name = file->GetName();
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 8ad973c3a..c10f826a4 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -181,7 +181,7 @@ ResultStatus AppLoader_NRO::Load(Kernel::Process& process) {
     }
 
     // Load NRO
-    const VAddr base_address = process.vm_manager.GetCodeRegionBaseAddress();
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
 
     if (!LoadNro(file, base_address)) {
         return ResultStatus::ErrorLoadingNRO;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 6fe3e17a7..cbe2a3e53 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -159,7 +159,7 @@ ResultStatus AppLoader_NSO::Load(Kernel::Process& process) {
     }
 
     // Load module
-    const VAddr base_address = process.vm_manager.GetCodeRegionBaseAddress();
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     LoadModule(file, base_address);
     LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address);
 
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 6430daad4..014298ed6 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -119,7 +119,7 @@ void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPoin
 static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) {
     u8* direct_pointer = nullptr;
 
-    auto& vm_manager = process.vm_manager;
+    auto& vm_manager = process.VMManager();
 
     auto it = vm_manager.FindVMA(vaddr);
     ASSERT(it != vm_manager.vma_map.end());
@@ -214,7 +214,7 @@ void Write(const VAddr vaddr, const T data) {
 }
 
 bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
-    auto& page_table = process.vm_manager.page_table;
+    const auto& page_table = process.VMManager().page_table;
 
     const u8* page_pointer = page_table.pointers[vaddr >> PAGE_BITS];
     if (page_pointer)
@@ -363,7 +363,7 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
         }
     };
 
-    const auto& vm_manager = Core::CurrentProcess()->vm_manager;
+    const auto& vm_manager = Core::CurrentProcess()->VMManager();
 
     CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
     CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
@@ -387,7 +387,7 @@ u64 Read64(const VAddr addr) {
 
 void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
                const std::size_t size) {
-    auto& page_table = process.vm_manager.page_table;
+    const auto& page_table = process.VMManager().page_table;
 
     std::size_t remaining_size = size;
     std::size_t page_index = src_addr >> PAGE_BITS;
@@ -452,7 +452,7 @@ void Write64(const VAddr addr, const u64 data) {
 
 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
                 const std::size_t size) {
-    auto& page_table = process.vm_manager.page_table;
+    const auto& page_table = process.VMManager().page_table;
     std::size_t remaining_size = size;
     std::size_t page_index = dest_addr >> PAGE_BITS;
     std::size_t page_offset = dest_addr & PAGE_MASK;
@@ -498,7 +498,7 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t
 }
 
 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
-    auto& page_table = process.vm_manager.page_table;
+    const auto& page_table = process.VMManager().page_table;
     std::size_t remaining_size = size;
     std::size_t page_index = dest_addr >> PAGE_BITS;
     std::size_t page_offset = dest_addr & PAGE_MASK;
@@ -540,7 +540,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
 
 void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
                const std::size_t size) {
-    auto& page_table = process.vm_manager.page_table;
+    const auto& page_table = process.VMManager().page_table;
     std::size_t remaining_size = size;
     std::size_t page_index = src_addr >> PAGE_BITS;
     std::size_t page_offset = src_addr & PAGE_MASK;
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index c17a122cd..c0a57e71f 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -16,7 +16,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
     : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
 
     Core::CurrentProcess() = Kernel::Process::Create(kernel, "");
-    page_table = &Core::CurrentProcess()->vm_manager.page_table;
+    page_table = &Core::CurrentProcess()->VMManager().page_table;
 
     std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
     page_table->special_regions.clear();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 1fcd13f04..14d82a7bc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -738,7 +738,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
         }
 
         texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
-        Surface surface = res_cache.GetTextureSurface(texture);
+        Surface surface = res_cache.GetTextureSurface(texture, entry);
         if (surface != nullptr) {
             state.texture_units[current_bindpoint].texture = surface->Texture().handle;
             state.texture_units[current_bindpoint].target = surface->Target();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 24a540258..ce967c4d6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -41,7 +41,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
 }
 
 /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
-    const Tegra::Texture::FullTextureInfo& config) {
+    const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
     SurfaceParams params{};
     params.addr = TryGetCpuAddr(config.tic.Address());
     params.is_tiled = config.tic.IsTiled();
@@ -60,9 +60,23 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     case SurfaceTarget::Texture2D:
         params.depth = 1;
         break;
+    case SurfaceTarget::TextureCubemap:
+        params.depth = config.tic.Depth() * 6;
+        break;
     case SurfaceTarget::Texture3D:
+        params.depth = config.tic.Depth();
+        break;
     case SurfaceTarget::Texture2DArray:
         params.depth = config.tic.Depth();
+        if (!entry.IsArray()) {
+            // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of
+            // one, but sample the texture in the shader as if it were not an array texture. This
+            // probably is valid on hardware, but we still need to write a test to confirm this. In
+            // emulation, the workaround here is to continue to treat this as a Texture2D. An
+            // example game that does this is Super Mario Odyssey (in Cloud Kingdom).
+            ASSERT(params.depth == 1);
+            params.target = SurfaceTarget::Texture2D;
+        }
         break;
     default:
         LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
@@ -71,7 +85,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
         break;
     }
 
-    params.size_in_bytes = params.SizeInBytes();
+    params.size_in_bytes_total = params.SizeInBytesTotal();
+    params.size_in_bytes_2d = params.SizeInBytes2D();
+    params.max_mip_level = config.tic.max_mip_level + 1;
+    params.rt = {};
+
     return params;
 }
 
@@ -89,7 +107,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.unaligned_height = config.height;
     params.target = SurfaceTarget::Texture2D;
     params.depth = 1;
-    params.size_in_bytes = params.SizeInBytes();
+    params.size_in_bytes_total = params.SizeInBytesTotal();
+    params.size_in_bytes_2d = params.SizeInBytes2D();
+    params.max_mip_level = 0;
+
+    // Render target specific parameters, not used for caching
+    params.rt.index = static_cast<u32>(index);
+    params.rt.array_mode = config.array_mode;
+    params.rt.layer_stride = config.layer_stride;
+    params.rt.base_layer = config.base_layer;
+
     return params;
 }
 
@@ -108,7 +135,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.unaligned_height = zeta_height;
     params.target = SurfaceTarget::Texture2D;
     params.depth = 1;
-    params.size_in_bytes = params.SizeInBytes();
+    params.size_in_bytes_total = params.SizeInBytesTotal();
+    params.size_in_bytes_2d = params.SizeInBytes2D();
+    params.max_mip_level = 0;
+    params.rt = {};
+
     return params;
 }
 
@@ -400,9 +431,13 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
         // clang-format on
 };
 
-static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
-                         const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
-                         GLuint read_fb_handle, GLuint draw_fb_handle) {
+static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
+                        GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
+                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+
+    const auto& src_params{src_surface->GetSurfaceParams()};
+    const auto& dst_params{dst_surface->GetSurfaceParams()};
+
     OpenGLState prev_state{OpenGLState::GetCurState()};
     SCOPE_EXIT({ prev_state.Apply(); });
 
@@ -413,47 +448,203 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
 
     u32 buffers{};
 
-    if (type == SurfaceType::ColorTexture) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
-                               0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
+    if (src_params.type == SurfaceType::ColorTexture) {
+        switch (src_params.target) {
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
+            glFramebufferTexture2D(
+                GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+                src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(
+                GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                      src_surface->Texture().handle, 0, 0);
+            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture3D:
+            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   SurfaceTargetToGL(src_params.target),
+                                   src_surface->Texture().handle, 0, 0);
+            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0);
+            break;
+        default:
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        }
+
+        switch (dst_params.target) {
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+                dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                      dst_surface->Texture().handle, 0, 0);
+            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+            break;
 
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
-                               0);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
+        case SurfaceParams::SurfaceTarget::Texture3D:
+            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   SurfaceTargetToGL(dst_params.target),
+                                   dst_surface->Texture().handle, 0, 0);
+            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                   SurfaceTargetToGL(dst_params.target), 0, 0, 0);
+            break;
+        default:
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        }
 
         buffers = GL_COLOR_BUFFER_BIT;
-    } else if (type == SurfaceType::Depth) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
+    } else if (src_params.type == SurfaceType::Depth) {
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                               src_surface->Texture().handle, 0);
         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
 
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                               dst_surface->Texture().handle, 0);
         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
 
         buffers = GL_DEPTH_BUFFER_BIT;
-    } else if (type == SurfaceType::DepthStencil) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+    } else if (src_params.type == SurfaceType::DepthStencil) {
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                               GL_TEXTURE_2D, 0, 0);
         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                               src_tex, 0);
+                               src_surface->Texture().handle, 0);
 
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                               GL_TEXTURE_2D, 0, 0);
         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                               dst_tex, 0);
+                               dst_surface->Texture().handle, 0);
 
         buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
     }
 
-    glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
-                      dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
+    const auto& rect{src_params.GetRect()};
+    glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom,
+                      rect.right, rect.top, buffers,
                       buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
 
     return true;
 }
 
+static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
+                        GLuint copy_pbo_handle, GLenum src_attachment = 0,
+                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+    ASSERT_MSG(dst_attachment == 0, "Unimplemented");
+
+    const auto& src_params{src_surface->GetSurfaceParams()};
+    const auto& dst_params{dst_surface->GetSurfaceParams()};
+
+    auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
+    auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+
+    std::size_t buffer_size =
+        std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total);
+
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
+    glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
+    if (source_format.compressed) {
+        glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
+                                    static_cast<GLsizei>(src_params.size_in_bytes_total), nullptr);
+    } else {
+        glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
+                          source_format.type, static_cast<GLsizei>(src_params.size_in_bytes_total),
+                          nullptr);
+    }
+    // If the new texture is bigger than the previous one, we need to fill in the rest with data
+    // from the CPU.
+    if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) {
+        // Upload the rest of the memory.
+        if (dst_params.is_tiled) {
+            // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
+            // of the data in this case. Games like Super Mario Odyssey seem to hit this case
+            // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
+            // but it doesn't clear it beforehand, the texture is already full of zeros.
+            LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
+                              "reinterpretation but the texture is tiled.");
+        }
+        std::size_t remaining_size =
+            dst_params.size_in_bytes_total - src_params.size_in_bytes_total;
+        std::vector<u8> data(remaining_size);
+        Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(),
+                          data.size());
+        glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size,
+                        data.data());
+    }
+
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+    const GLsizei width{static_cast<GLsizei>(
+        std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))};
+    const GLsizei height{static_cast<GLsizei>(
+        std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))};
+
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
+    if (dest_format.compressed) {
+        LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
+        UNREACHABLE();
+    } else {
+        switch (dst_params.target) {
+        case SurfaceParams::SurfaceTarget::Texture1D:
+            glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
+                                dest_format.type, nullptr);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
+                                dest_format.format, dest_format.type, nullptr);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
+                                static_cast<GLsizei>(dst_params.depth), dest_format.format,
+                                dest_format.type, nullptr);
+            break;
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
+            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
+                                static_cast<GLint>(cubemap_face), width, height, 1,
+                                dest_format.format, dest_format.type, nullptr);
+            break;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                         static_cast<u32>(dst_params.target));
+            UNREACHABLE();
+        }
+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+    }
+}
+
 CachedSurface::CachedSurface(const SurfaceParams& params)
     : params(params), gl_target(SurfaceTargetToGL(params.target)) {
     texture.Create();
@@ -481,6 +672,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
                            rect.GetWidth());
             break;
         case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
             glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
                            rect.GetWidth(), rect.GetHeight());
             break;
@@ -585,29 +777,39 @@ void CachedSurface::LoadGLBuffer() {
 
     const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
     const u32 copy_size = params.width * params.height * bytes_per_pixel;
+    const std::size_t total_size = copy_size * params.depth;
 
     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
 
     if (params.is_tiled) {
+        gl_buffer.resize(total_size);
+
         // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
         // this for 3D textures, etc.
         switch (params.target) {
         case SurfaceParams::SurfaceTarget::Texture2D:
             // Pass impl. to the fallback code below
             break;
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
+            for (std::size_t index = 0; index < params.depth; ++index) {
+                const std::size_t offset{index * copy_size};
+                morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
+                    params.width, params.block_height, params.height, gl_buffer.data() + offset,
+                    copy_size, params.addr + offset);
+            }
+            break;
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
         }
 
-        gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
         morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
             params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
             params.addr);
     } else {
-        const u8* const texture_src_data_end{texture_src_data +
-                                             (static_cast<std::size_t>(params.depth) * copy_size)};
+        const u8* const texture_src_data_end{texture_src_data + total_size};
         gl_buffer.assign(texture_src_data, texture_src_data_end);
     }
 
@@ -634,7 +836,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
     // Load data from memory to the surface
     const GLint x0 = static_cast<GLint>(rect.left);
     const GLint y0 = static_cast<GLint>(rect.bottom);
-    const std::size_t buffer_offset =
+    std::size_t buffer_offset =
         static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
                                  static_cast<std::size_t>(x0)) *
         GetGLBytesPerPixel(params.pixel_format);
@@ -663,15 +865,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
             glCompressedTexImage2D(
                 SurfaceTargetToGL(params.target), 0, tuple.internal_format,
                 static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]);
+                static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
             break;
         case SurfaceParams::SurfaceTarget::Texture3D:
         case SurfaceParams::SurfaceTarget::Texture2DArray:
             glCompressedTexImage3D(
                 SurfaceTargetToGL(params.target), 0, tuple.internal_format,
                 static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
-                static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes),
-                &gl_buffer[buffer_offset]);
+                static_cast<GLsizei>(params.depth), 0,
+                static_cast<GLsizei>(params.size_in_bytes_total), &gl_buffer[buffer_offset]);
+            break;
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
+            for (std::size_t face = 0; face < params.depth; ++face) {
+                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
+                                       0, tuple.internal_format, static_cast<GLsizei>(params.width),
+                                       static_cast<GLsizei>(params.height), 0,
+                                       static_cast<GLsizei>(params.size_in_bytes_2d),
+                                       &gl_buffer[buffer_offset]);
+                buffer_offset += params.size_in_bytes_2d;
+            }
             break;
         default:
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -679,8 +891,8 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
             UNREACHABLE();
             glCompressedTexImage2D(
                 GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
-                static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
-                &gl_buffer[buffer_offset]);
+                static_cast<GLsizei>(params.height), 0,
+                static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
         }
     } else {
 
@@ -703,6 +915,15 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
                             static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
                             tuple.type, &gl_buffer[buffer_offset]);
             break;
+        case SurfaceParams::SurfaceTarget::TextureCubemap:
+            for (std::size_t face = 0; face < params.depth; ++face) {
+                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0,
+                                y0, static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[buffer_offset]);
+                buffer_offset += params.size_in_bytes_2d;
+            }
+            break;
         default:
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
@@ -722,8 +943,9 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
     copy_pbo.Create();
 }
 
-Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
-    return GetSurface(SurfaceParams::CreateForTexture(config));
+Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
+                                                 const GLShader::SamplerEntry& entry) {
+    return GetSurface(SurfaceParams::CreateForTexture(config, entry));
 }
 
 Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
@@ -811,98 +1033,69 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
     return surface;
 }
 
-Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
+Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
                                                const SurfaceParams& new_params) {
     // Verify surface is compatible for blitting
-    const auto& params{surface->GetSurfaceParams()};
+    auto old_params{old_surface->GetSurfaceParams()};
 
     // Get a new surface with the new parameters, and blit the previous surface to it
     Surface new_surface{GetUncachedSurface(new_params)};
 
-    if (params.pixel_format == new_params.pixel_format ||
-        !Settings::values.use_accurate_framebuffers) {
-        // If the format is the same, just do a framebuffer blit. This is significantly faster than
-        // using PBOs. The is also likely less accurate, as textures will be converted rather than
-        // reinterpreted.
-
-        BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
-                     params.GetRect(), params.type, read_framebuffer.handle,
-                     draw_framebuffer.handle);
-    } else {
-        // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
-        // where pixels are reinterpreted as a new format (without conversion). This code path uses
-        // OpenGL PBOs and is quite slow.
-
-        auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
-        auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
-
-        std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+    // If the format is the same, just do a framebuffer blit. This is significantly faster than
+    // using PBOs. The is also likely less accurate, as textures will be converted rather than
+    // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate
+    // surface copy, where pixels are reinterpreted as a new format (without conversion). This
+    // code path uses OpenGL PBOs and is quite slow.
+    const bool is_blit{old_params.pixel_format == new_params.pixel_format ||
+                       !Settings::values.use_accurate_framebuffers};
 
-        glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
-        glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
-        if (source_format.compressed) {
-            glGetCompressedTextureImage(surface->Texture().handle, 0,
-                                        static_cast<GLsizei>(params.SizeInBytes()), nullptr);
+    switch (new_params.target) {
+    case SurfaceParams::SurfaceTarget::Texture2D:
+        if (is_blit) {
+            BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
         } else {
-            glGetTextureImage(surface->Texture().handle, 0, source_format.format,
-                              source_format.type, static_cast<GLsizei>(params.SizeInBytes()),
-                              nullptr);
+            CopySurface(old_surface, new_surface, copy_pbo.handle);
         }
-        // If the new texture is bigger than the previous one, we need to fill in the rest with data
-        // from the CPU.
-        if (params.SizeInBytes() < new_params.SizeInBytes()) {
-            // Upload the rest of the memory.
-            if (new_params.is_tiled) {
-                // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
-                // of the data in this case. Games like Super Mario Odyssey seem to hit this case
-                // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
-                // but it doesn't clear it beforehand, the texture is already full of zeros.
-                LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
-                                  "reinterpretation but the texture is tiled.");
-            }
-            std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
-            std::vector<u8> data(remaining_size);
-            Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
-            glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
-                            data.data());
+        break;
+    case SurfaceParams::SurfaceTarget::TextureCubemap: {
+        if (old_params.rt.array_mode != 1) {
+            // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this
+            // yet (array rendering used as a cubemap texture).
+            LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode);
+            UNREACHABLE();
+            return new_surface;
         }
 
-        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-
-        const auto& dest_rect{new_params.GetRect()};
-
-        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle);
-        if (dest_format.compressed) {
-            LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
-            UNREACHABLE();
-        } else {
-            switch (new_params.target) {
-            case SurfaceParams::SurfaceTarget::Texture1D:
-                glTextureSubImage1D(new_surface->Texture().handle, 0, 0,
-                                    static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format,
-                                    dest_format.type, nullptr);
-                break;
-            case SurfaceParams::SurfaceTarget::Texture2D:
-                glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
-                                    static_cast<GLsizei>(dest_rect.GetWidth()),
-                                    static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
-                                    dest_format.type, nullptr);
-                break;
-            case SurfaceParams::SurfaceTarget::Texture3D:
-            case SurfaceParams::SurfaceTarget::Texture2DArray:
-                glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0,
-                                    static_cast<GLsizei>(dest_rect.GetWidth()),
-                                    static_cast<GLsizei>(dest_rect.GetHeight()),
-                                    static_cast<GLsizei>(new_params.depth), dest_format.format,
-                                    dest_format.type, nullptr);
-                break;
-            default:
-                LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
-                             static_cast<u32>(params.target));
-                UNREACHABLE();
+        // This seems to be used for render-to-cubemap texture
+        ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected");
+        ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected");
+        ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented");
+
+        // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels.
+        // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild.
+        const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)};
+
+        for (std::size_t index = 0; index < new_params.depth; ++index) {
+            Surface face_surface{TryGetReservedSurface(old_params)};
+            ASSERT_MSG(face_surface, "Unexpected");
+
+            if (is_blit) {
+                BlitSurface(face_surface, new_surface, read_framebuffer.handle,
+                            draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index,
+                            new_params.rt.index, index);
+            } else {
+                CopySurface(face_surface, new_surface, copy_pbo.handle,
+                            face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index);
             }
+
+            old_params.addr += byte_stride;
         }
-        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+        break;
+    }
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                     static_cast<u32>(new_params.target));
+        UNREACHABLE();
     }
 
     return new_surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 80c5f324b..49025a3fe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -9,12 +9,14 @@
 #include <memory>
 #include <vector>
 
+#include "common/alignment.h"
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/math_util.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/textures/texture.h"
 
 namespace OpenGL {
@@ -126,6 +128,8 @@ struct SurfaceParams {
         case Tegra::Texture::TextureType::Texture2D:
         case Tegra::Texture::TextureType::Texture2DNoMipmap:
             return SurfaceTarget::Texture2D;
+        case Tegra::Texture::TextureType::TextureCubemap:
+            return SurfaceTarget::TextureCubemap;
         case Tegra::Texture::TextureType::Texture1DArray:
             return SurfaceTarget::Texture1DArray;
         case Tegra::Texture::TextureType::Texture2DArray:
@@ -689,17 +693,23 @@ struct SurfaceParams {
     /// Returns the rectangle corresponding to this surface
     MathUtil::Rectangle<u32> GetRect() const;
 
-    /// Returns the size of this surface in bytes, adjusted for compression
-    std::size_t SizeInBytes() const {
+    /// Returns the size of this surface as a 2D texture in bytes, adjusted for compression
+    std::size_t SizeInBytes2D() const {
         const u32 compression_factor{GetCompressionFactor(pixel_format)};
         ASSERT(width % compression_factor == 0);
         ASSERT(height % compression_factor == 0);
         return (width / compression_factor) * (height / compression_factor) *
-               GetFormatBpp(pixel_format) * depth / CHAR_BIT;
+               GetFormatBpp(pixel_format) / CHAR_BIT;
+    }
+
+    /// Returns the total size of this surface in bytes, adjusted for compression
+    std::size_t SizeInBytesTotal() const {
+        return SizeInBytes2D() * depth;
     }
 
     /// Creates SurfaceParams from a texture configuration
-    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
+    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
+                                          const GLShader::SamplerEntry& entry);
 
     /// Creates SurfaceParams from a framebuffer configuration
     static SurfaceParams CreateForFramebuffer(std::size_t index);
@@ -711,8 +721,9 @@ struct SurfaceParams {
 
     /// Checks if surfaces are compatible for caching
     bool IsCompatibleSurface(const SurfaceParams& other) const {
-        return std::tie(pixel_format, type, width, height) ==
-               std::tie(other.pixel_format, other.type, other.width, other.height);
+        return std::tie(pixel_format, type, width, height, target, depth) ==
+               std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
+                        other.depth);
     }
 
     VAddr addr;
@@ -725,8 +736,18 @@ struct SurfaceParams {
     u32 height;
     u32 depth;
     u32 unaligned_height;
-    std::size_t size_in_bytes;
+    std::size_t size_in_bytes_total;
+    std::size_t size_in_bytes_2d;
     SurfaceTarget target;
+    u32 max_mip_level;
+
+    // Render target specific parameters, not used in caching
+    struct {
+        u32 index;
+        u32 array_mode;
+        u32 layer_stride;
+        u32 base_layer;
+    } rt;
 };
 
 }; // namespace OpenGL
@@ -736,6 +757,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
     static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
         SurfaceReserveKey res;
         res.state = params;
+        res.state.rt = {}; // Ignore rt config in caching
         return res;
     }
 };
@@ -759,7 +781,7 @@ public:
     }
 
     std::size_t GetSizeInBytes() const {
-        return params.size_in_bytes;
+        return params.size_in_bytes_total;
     }
 
     const OGLTexture& Texture() const {
@@ -800,7 +822,8 @@ public:
     RasterizerCacheOpenGL();
 
     /// Get a surface based on the texture configuration
-    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
+    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
+                              const GLShader::SamplerEntry& entry);
 
     /// Get the depth surface based on the framebuffer configuration
     Surface GetDepthBufferSurface(bool preserve_contents);
@@ -822,7 +845,7 @@ private:
     Surface GetUncachedSurface(const SurfaceParams& params);
 
     /// Recreates a surface with new parameters
-    Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
+    Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
 
     /// Reserves a unique surface that can be reused later
     void ReserveSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index b3e95187e..320babdb1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2000,6 +2000,14 @@ private:
                     }
                     break;
                 }
+                case Tegra::Shader::TextureType::TextureCube: {
+                    ASSERT_MSG(!is_array, "Unimplemented");
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+                    coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+                    break;
+                }
                 default:
                     LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
                                  static_cast<u32>(texture_type));
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c2fb824b2..14aea4838 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -165,6 +165,8 @@ struct TICEntry {
 
         // High 16 bits of the pitch value
         BitField<0, 16, u32> pitch_high;
+
+        BitField<28, 4, u32> max_mip_level;
     };
     union {
         BitField<0, 16, u32> width_minus_1;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index d74489935..27015d02c 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -622,9 +622,9 @@ void GMainWindow::BootGame(const QString& filename) {
     std::string title_name;
     const auto res = Core::System::GetInstance().GetGameName(title_name);
     if (res != Loader::ResultStatus::Success) {
-        const u64 program_id = Core::System::GetInstance().CurrentProcess()->program_id;
+        const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
 
-        const auto [nacp, icon_file] = FileSys::PatchManager(program_id).GetControlMetadata();
+        const auto [nacp, icon_file] = FileSys::PatchManager(title_id).GetControlMetadata();
         if (nacp != nullptr)
             title_name = nacp->GetApplicationName();
 
@@ -1055,11 +1055,21 @@ void GMainWindow::OnMenuInstallToNAND() {
             return;
         }
 
-        if (index >= 5)
-            index += 0x7B;
+        // If index is equal to or past Game, add the jump in TitleType.
+        if (index >= 5) {
+            index += static_cast<size_t>(FileSys::TitleType::Application) -
+                     static_cast<size_t>(FileSys::TitleType::FirmwarePackageB);
+        }
+
+        FileSys::InstallResult res;
+        if (index >= static_cast<size_t>(FileSys::TitleType::Application)) {
+            res = Service::FileSystem::GetUserNANDContents()->InstallEntry(
+                nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy);
+        } else {
+            res = Service::FileSystem::GetSystemNANDContents()->InstallEntry(
+                nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy);
+        }
 
-        const auto res = Service::FileSystem::GetUserNANDContents()->InstallEntry(
-            nca, static_cast<FileSys::TitleType>(index), false, qt_raw_copy);
         if (res == FileSys::InstallResult::Success) {
             success();
         } else if (res == FileSys::InstallResult::ErrorAlreadyExists) {