diff options
Diffstat (limited to 'src')
63 files changed, 871 insertions, 545 deletions
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 949384fd3..e40d117d6 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -18,6 +18,7 @@ #include "common/fs/fs_paths.h" #include "common/fs/path_util.h" #include "common/literals.h" +#include "common/thread.h" #include "common/logging/backend.h" #include "common/logging/log.h" diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index f055f0e11..42744c994 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -111,6 +111,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Service, NCM) \ SUB(Service, NFC) \ SUB(Service, NFP) \ + SUB(Service, NGCT) \ SUB(Service, NIFM) \ SUB(Service, NIM) \ SUB(Service, NPNS) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 7ad0334fc..ddf9d27ca 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -81,6 +81,7 @@ enum class Class : u8 { Service_NCM, ///< The NCM service Service_NFC, ///< The NFC (Near-field communication) service Service_NFP, ///< The NFP service + Service_NGCT, ///< The NGCT (No Good Content for Terra) service Service_NIFM, ///< The NIFM (Network interface) service Service_NIM, ///< The NIM service Service_NPNS, ///< The NPNS service diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 000000000..365488ba5 --- /dev/null +++ b/src/common/lru_cache.h @@ -0,0 +1,140 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2+ or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <deque> +#include <memory> +#include <type_traits> + +#include "common/common_types.h" + +namespace Common { + +template <class Traits> +class LeastRecentlyUsedCache { + using ObjectType = typename Traits::ObjectType; + using TickType = typename Traits::TickType; + + struct Item { + ObjectType obj; + TickType tick; + Item* next{}; + Item* prev{}; + }; + +public: + LeastRecentlyUsedCache() : first_item{}, last_item{} {} + ~LeastRecentlyUsedCache() = default; + + size_t Insert(ObjectType obj, TickType tick) { + const auto new_id = Build(); + auto& item = item_pool[new_id]; + item.obj = obj; + item.tick = tick; + Attach(item); + return new_id; + } + + void Touch(size_t id, TickType tick) { + auto& item = item_pool[id]; + if (item.tick >= tick) { + return; + } + item.tick = tick; + if (&item == last_item) { + return; + } + Detach(item); + Attach(item); + } + + void Free(size_t id) { + auto& item = item_pool[id]; + Detach(item); + item.prev = nullptr; + item.next = nullptr; + free_items.push_back(id); + } + + template <typename Func> + void ForEachItemBelow(TickType tick, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v<std::invoke_result<Func, ObjectType>, bool>; + Item* iterator = first_item; + while (iterator) { + if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) { + return; + } + Item* next = iterator->next; + if constexpr (RETURNS_BOOL) { + if (func(iterator->obj)) { + return; + } + } else { + func(iterator->obj); + } + iterator = next; + } + } + +private: + size_t Build() { + if (free_items.empty()) { + const size_t item_id = item_pool.size(); + auto& item = item_pool.emplace_back(); + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + const size_t item_id = free_items.front(); + free_items.pop_front(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + + void Attach(Item& item) { + if (!first_item) { + first_item = &item; + } + if (!last_item) { + last_item = &item; + } else { + item.prev = last_item; + last_item->next = &item; + item.next = nullptr; + last_item = &item; + } + } + + void Detach(Item& item) { + if (item.prev) { + item.prev->next = item.next; + } + if (item.next) { + item.next->prev = item.prev; + } + if (&item == first_item) { + first_item = item.next; + if (first_item) { + first_item->prev = nullptr; + } + } + if (&item == last_item) { + last_item = item.prev; + if (last_item) { + last_item->next = nullptr; + } + } + } + + std::deque<Item> item_pool; + std::deque<size_t> free_items; + Item* first_item{}; + Item* last_item{}; +}; + +} // namespace Common diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 996315999..fd3b639cd 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -59,7 +59,6 @@ void LogSettings() { log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); - log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); log_setting("Audio_OutputEngine", values.sink_id.GetValue()); log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) { values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); - values.use_caches_gc.SetGlobal(true); values.bg_red.SetGlobal(true); values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 20769d310..ec4d381e8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -475,7 +475,6 @@ struct Values { ShaderBackend::SPIRV, "shader_backend"}; Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; - Setting<bool> use_caches_gc{false, "use_caches_gc"}; Setting<u8> bg_red{0, "bg_red"}; Setting<u8> bg_green{0, "bg_green"}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f5cf5c16a..87d47e2e5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -452,6 +452,8 @@ add_library(core STATIC hle/service/nfp/nfp.h hle/service/nfp/nfp_user.cpp hle/service/nfp/nfp_user.h + hle/service/ngct/ngct.cpp + hle/service/ngct/ngct.h hle/service/nifm/nifm.cpp hle/service/nifm/nifm.h hle/service/nim/nim.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 5893a86bf..ba4629993 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -507,6 +507,12 @@ const ARM_Interface& System::CurrentArmInterface() const { return impl->kernel.CurrentPhysicalCore().ArmInterface(); } +std::size_t System::CurrentCoreIndex() const { + std::size_t core = impl->kernel.GetCurrentHostThreadID(); + ASSERT(core < Core::Hardware::NUM_CPU_CORES); + return core; +} + Kernel::PhysicalCore& System::CurrentPhysicalCore() { return impl->kernel.CurrentPhysicalCore(); } diff --git a/src/core/core.h b/src/core/core.h index f9116ebb6..715ab88e7 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -205,6 +205,9 @@ public: /// Gets an ARM interface to the CPU core that is currently running [[nodiscard]] const ARM_Interface& CurrentArmInterface() const; + /// Gets the index of the currently running CPU core + [[nodiscard]] std::size_t CurrentCoreIndex() const; + /// Gets the physical core for the CPU core that is currently running [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore(); diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index de2e5563e..77efcabf0 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -118,18 +118,17 @@ void CpuManager::MultiCoreRunGuestLoop() { physical_core = &kernel.CurrentPhysicalCore(); } system.ExitDynarmicProfile(); - { - Kernel::KScopedDisableDispatch dd(kernel); - physical_core->ArmInterface().ClearExclusiveState(); - } + physical_core->ArmInterface().ClearExclusiveState(); + kernel.CurrentScheduler()->RescheduleCurrentCore(); } } void CpuManager::MultiCoreRunIdleThread() { auto& kernel = system.Kernel(); while (true) { - Kernel::KScopedDisableDispatch dd(kernel); - kernel.CurrentPhysicalCore().Idle(); + auto& physical_core = kernel.CurrentPhysicalCore(); + physical_core.Idle(); + kernel.CurrentScheduler()->RescheduleCurrentCore(); } } @@ -137,12 +136,12 @@ void CpuManager::MultiCoreRunSuspendThread() { auto& kernel = system.Kernel(); kernel.CurrentScheduler()->OnThreadStart(); while (true) { - auto core = kernel.CurrentPhysicalCoreIndex(); + auto core = kernel.GetCurrentHostThreadID(); auto& scheduler = *kernel.CurrentScheduler(); Kernel::KThread* current_thread = scheduler.GetCurrentThread(); Common::Fiber::YieldTo(current_thread->GetHostContext(), *core_data[core].host_context); ASSERT(scheduler.ContextSwitchPending()); - ASSERT(core == kernel.CurrentPhysicalCoreIndex()); + ASSERT(core == kernel.GetCurrentHostThreadID()); scheduler.RescheduleCurrentCore(); } } @@ -348,11 +347,15 @@ void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) { sc_sync_first_use = false; } - // Emulation was stopped - if (stop_token.stop_requested()) { + // Abort if emulation was killed before the session really starts + if (!system.IsPoweredOn()) { return; } + if (stop_token.stop_requested()) { + break; + } + auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); data.is_running = true; Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext()); diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 6771ef621..1b429bc1e 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -28,7 +28,7 @@ bool ReadFromUser(Core::System& system, s32* out, VAddr address) { bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) { auto& monitor = system.Monitor(); - const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); + const auto current_core = system.CurrentCoreIndex(); // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable. // TODO(bunnei): We should call CanAccessAtomic(..) here. @@ -58,7 +58,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) { auto& monitor = system.Monitor(); - const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); + const auto current_core = system.CurrentCoreIndex(); // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable. // TODO(bunnei): We should call CanAccessAtomic(..) here. diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h index 165b76747..e4fcdbc67 100644 --- a/src/core/hle/kernel/k_auto_object.h +++ b/src/core/hle/kernel/k_auto_object.h @@ -170,10 +170,6 @@ public: } } - const std::string& GetName() const { - return name; - } - private: void RegisterWithKernel(); void UnregisterWithKernel(); diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index 4174f35fd..ef14ad1d2 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -35,7 +35,7 @@ bool WriteToUser(Core::System& system, VAddr address, const u32* p) { bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero, u32 new_orr_mask) { auto& monitor = system.Monitor(); - const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); + const auto current_core = system.CurrentCoreIndex(); // Load the value from the address. const auto expected = monitor.ExclusiveRead32(current_core, address); diff --git a/src/core/hle/kernel/k_handle_table.cpp b/src/core/hle/kernel/k_handle_table.cpp index d720c2dda..6a420d5b0 100644 --- a/src/core/hle/kernel/k_handle_table.cpp +++ b/src/core/hle/kernel/k_handle_table.cpp @@ -13,7 +13,6 @@ ResultCode KHandleTable::Finalize() { // Get the table and clear our record of it. u16 saved_table_size = 0; { - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); std::swap(m_table_size, saved_table_size); @@ -44,7 +43,6 @@ bool KHandleTable::Remove(Handle handle) { // Find the object and free the entry. KAutoObject* obj = nullptr; { - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); if (this->IsValidHandle(handle)) { @@ -63,7 +61,6 @@ bool KHandleTable::Remove(Handle handle) { } ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) { - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); // Never exceed our capacity. @@ -86,7 +83,6 @@ ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) { } ResultCode KHandleTable::Reserve(Handle* out_handle) { - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); // Never exceed our capacity. @@ -97,7 +93,6 @@ ResultCode KHandleTable::Reserve(Handle* out_handle) { } void KHandleTable::Unreserve(Handle handle) { - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); // Unpack the handle. @@ -116,7 +111,6 @@ void KHandleTable::Unreserve(Handle handle) { } void KHandleTable::Register(Handle handle, KAutoObject* obj, u16 type) { - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); // Unpack the handle. diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h index 75dcec7df..2ff6aa160 100644 --- a/src/core/hle/kernel/k_handle_table.h +++ b/src/core/hle/kernel/k_handle_table.h @@ -69,7 +69,6 @@ public: template <typename T = KAutoObject> KScopedAutoObject<T> GetObjectWithoutPseudoHandle(Handle handle) const { // Lock and look up in table. - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); if constexpr (std::is_same_v<T, KAutoObject>) { @@ -124,7 +123,6 @@ public: size_t num_opened; { // Lock the table. - KScopedDisableDispatch dd(kernel); KScopedSpinLock lk(m_lock); for (num_opened = 0; num_opened < num_handles; num_opened++) { // Get the current handle. diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 3d7e6707e..8ead1a769 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -59,7 +59,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority thread->GetContext64().cpu_registers[0] = 0; thread->GetContext32().cpu_registers[1] = thread_handle; thread->GetContext64().cpu_registers[1] = thread_handle; - thread->DisableDispatch(); auto& kernel = system.Kernel(); // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index 6ddbae52c..6a7d80d03 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -376,18 +376,20 @@ void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) { } void KScheduler::DisableScheduling(KernelCore& kernel) { - ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 0); - GetCurrentThreadPointer(kernel)->DisableDispatch(); + if (auto* scheduler = kernel.CurrentScheduler(); scheduler) { + ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0); + scheduler->GetCurrentThread()->DisableDispatch(); + } } void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling) { - ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 1); - - if (GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() > 1) { - GetCurrentThreadPointer(kernel)->EnableDispatch(); - } else { - RescheduleCores(kernel, cores_needing_scheduling); + if (auto* scheduler = kernel.CurrentScheduler(); scheduler) { + ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1); + if (scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1) { + scheduler->GetCurrentThread()->EnableDispatch(); + } } + RescheduleCores(kernel, cores_needing_scheduling); } u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) { @@ -615,17 +617,13 @@ KScheduler::KScheduler(Core::System& system_, s32 core_id_) : system{system_}, c state.highest_priority_thread = nullptr; } -void KScheduler::Finalize() { +KScheduler::~KScheduler() { if (idle_thread) { idle_thread->Close(); idle_thread = nullptr; } } -KScheduler::~KScheduler() { - ASSERT(!idle_thread); -} - KThread* KScheduler::GetCurrentThread() const { if (auto result = current_thread.load(); result) { return result; @@ -644,12 +642,10 @@ void KScheduler::RescheduleCurrentCore() { if (phys_core.IsInterrupted()) { phys_core.ClearInterrupt(); } - guard.Lock(); if (state.needs_scheduling.load()) { Schedule(); } else { - GetCurrentThread()->EnableDispatch(); guard.Unlock(); } } @@ -659,33 +655,26 @@ void KScheduler::OnThreadStart() { } void KScheduler::Unload(KThread* thread) { - ASSERT(thread); - LOG_TRACE(Kernel, "core {}, unload thread {}", core_id, thread ? thread->GetName() : "nullptr"); - if (thread->IsCallingSvc()) { - thread->ClearIsCallingSvc(); - } - - auto& physical_core = system.Kernel().PhysicalCore(core_id); - if (!physical_core.IsInitialized()) { - return; - } - - Core::ARM_Interface& cpu_core = physical_core.ArmInterface(); - cpu_core.SaveContext(thread->GetContext32()); - cpu_core.SaveContext(thread->GetContext64()); - // Save the TPIDR_EL0 system register in case it was modified. - thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); - cpu_core.ClearExclusiveState(); - - if (!thread->IsTerminationRequested() && thread->GetActiveCore() == core_id) { - prev_thread = thread; - } else { - prev_thread = nullptr; + if (thread) { + if (thread->IsCallingSvc()) { + thread->ClearIsCallingSvc(); + } + if (!thread->IsTerminationRequested()) { + prev_thread = thread; + + Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); + cpu_core.SaveContext(thread->GetContext32()); + cpu_core.SaveContext(thread->GetContext64()); + // Save the TPIDR_EL0 system register in case it was modified. + thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); + } else { + prev_thread = nullptr; + } + thread->context_guard.Unlock(); } - - thread->context_guard.Unlock(); } void KScheduler::Reload(KThread* thread) { @@ -694,6 +683,11 @@ void KScheduler::Reload(KThread* thread) { if (thread) { ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable."); + auto* const thread_owner_process = thread->GetOwnerProcess(); + if (thread_owner_process != nullptr) { + system.Kernel().MakeCurrentProcess(thread_owner_process); + } + Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); cpu_core.LoadContext(thread->GetContext32()); cpu_core.LoadContext(thread->GetContext64()); @@ -711,7 +705,7 @@ void KScheduler::SwitchContextStep2() { } void KScheduler::ScheduleImpl() { - KThread* previous_thread = GetCurrentThread(); + KThread* previous_thread = current_thread.load(); KThread* next_thread = state.highest_priority_thread; state.needs_scheduling = false; @@ -723,15 +717,10 @@ void KScheduler::ScheduleImpl() { // If we're not actually switching thread, there's nothing to do. if (next_thread == current_thread.load()) { - previous_thread->EnableDispatch(); guard.Unlock(); return; } - if (next_thread->GetCurrentCore() != core_id) { - next_thread->SetCurrentCore(core_id); - } - current_thread.store(next_thread); KProcess* const previous_process = system.Kernel().CurrentProcess(); @@ -742,7 +731,11 @@ void KScheduler::ScheduleImpl() { Unload(previous_thread); std::shared_ptr<Common::Fiber>* old_context; - old_context = &previous_thread->GetHostContext(); + if (previous_thread != nullptr) { + old_context = &previous_thread->GetHostContext(); + } else { + old_context = &idle_thread->GetHostContext(); + } guard.Unlock(); Common::Fiber::YieldTo(*old_context, *switch_fiber); diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h index 516e0cdba..12cfae919 100644 --- a/src/core/hle/kernel/k_scheduler.h +++ b/src/core/hle/kernel/k_scheduler.h @@ -33,8 +33,6 @@ public: explicit KScheduler(Core::System& system_, s32 core_id_); ~KScheduler(); - void Finalize(); - /// Reschedules to the next available thread (call after current thread is suspended) void RescheduleCurrentCore(); diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 0f6808ade..9f1d3156b 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -14,7 +14,6 @@ #include "common/fiber.h" #include "common/logging/log.h" #include "common/scope_exit.h" -#include "common/settings.h" #include "common/thread_queue_list.h" #include "core/core.h" #include "core/cpu_manager.h" @@ -189,7 +188,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s // Setup the stack parameters. StackParameters& sp = GetStackParameters(); sp.cur_thread = this; - sp.disable_count = 0; + sp.disable_count = 1; SetInExceptionHandler(); // Set thread ID. @@ -216,10 +215,9 @@ ResultCode KThread::InitializeThread(KThread* thread, KThreadFunction func, uint // Initialize the thread. R_TRY(thread->Initialize(func, arg, user_stack_top, prio, core, owner, type)); - // Initialize emulation parameters. + // Initialize host context. thread->host_context = std::make_shared<Common::Fiber>(std::move(init_func), init_func_parameter); - thread->is_single_core = !Settings::values.use_multi_core.GetValue(); return ResultSuccess; } @@ -972,9 +970,6 @@ ResultCode KThread::Run() { // Set our state and finish. SetState(ThreadState::Runnable); - - DisableDispatch(); - return ResultSuccess; } } @@ -1059,16 +1054,4 @@ s32 GetCurrentCoreId(KernelCore& kernel) { return GetCurrentThread(kernel).GetCurrentCore(); } -KScopedDisableDispatch::~KScopedDisableDispatch() { - if (GetCurrentThread(kernel).GetDisableDispatchCount() <= 1) { - auto scheduler = kernel.CurrentScheduler(); - - if (scheduler) { - scheduler->RescheduleCurrentCore(); - } - } else { - GetCurrentThread(kernel).EnableDispatch(); - } -} - } // namespace Kernel diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index e4c4c877d..c77f44ad4 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -450,39 +450,16 @@ public: sleeping_queue = q; } - [[nodiscard]] bool IsKernelThread() const { - return GetActiveCore() == 3; - } - - [[nodiscard]] bool IsDispatchTrackingDisabled() const { - return is_single_core || IsKernelThread(); - } - [[nodiscard]] s32 GetDisableDispatchCount() const { - if (IsDispatchTrackingDisabled()) { - // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch. - return 1; - } - return this->GetStackParameters().disable_count; } void DisableDispatch() { - if (IsDispatchTrackingDisabled()) { - // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch. - return; - } - ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() >= 0); this->GetStackParameters().disable_count++; } void EnableDispatch() { - if (IsDispatchTrackingDisabled()) { - // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch. - return; - } - ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() > 0); this->GetStackParameters().disable_count--; } @@ -731,7 +708,6 @@ private: // For emulation std::shared_ptr<Common::Fiber> host_context{}; - bool is_single_core{}; // For debugging std::vector<KSynchronizationObject*> wait_objects_for_debugging; @@ -776,16 +752,4 @@ public: } }; -class KScopedDisableDispatch { -public: - [[nodiscard]] explicit KScopedDisableDispatch(KernelCore& kernel_) : kernel{kernel_} { - GetCurrentThread(kernel).DisableDispatch(); - } - - ~KScopedDisableDispatch(); - -private: - KernelCore& kernel; -}; - } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 8fdab44e4..bea945301 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -85,9 +85,8 @@ struct KernelCore::Impl { } void InitializeCores() { - for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { - cores[core_id].Initialize(current_process->Is64BitProcess()); - system.Memory().SetCurrentPageTable(*current_process, core_id); + for (auto& core : cores) { + core.Initialize(current_process->Is64BitProcess()); } } @@ -132,6 +131,15 @@ struct KernelCore::Impl { next_user_process_id = KProcess::ProcessIDMin; next_thread_id = 1; + for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { + if (suspend_threads[core_id]) { + suspend_threads[core_id]->Close(); + suspend_threads[core_id] = nullptr; + } + + schedulers[core_id].reset(); + } + cores.clear(); global_handle_table->Finalize(); @@ -159,16 +167,6 @@ struct KernelCore::Impl { CleanupObject(time_shared_mem); CleanupObject(system_resource_limit); - for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { - if (suspend_threads[core_id]) { - suspend_threads[core_id]->Close(); - suspend_threads[core_id] = nullptr; - } - - schedulers[core_id]->Finalize(); - schedulers[core_id].reset(); - } - // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others next_host_thread_id = Core::Hardware::NUM_CPU_CORES; @@ -259,6 +257,14 @@ struct KernelCore::Impl { void MakeCurrentProcess(KProcess* process) { current_process = process; + if (process == nullptr) { + return; + } + + const u32 core_id = GetCurrentHostThreadID(); + if (core_id < Core::Hardware::NUM_CPU_CORES) { + system.Memory().SetCurrentPageTable(*process, core_id); + } } static inline thread_local u32 host_thread_id = UINT32_MAX; @@ -821,20 +827,16 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { return impl->cores[id]; } -size_t KernelCore::CurrentPhysicalCoreIndex() const { - const u32 core_id = impl->GetCurrentHostThreadID(); - if (core_id >= Core::Hardware::NUM_CPU_CORES) { - return Core::Hardware::NUM_CPU_CORES - 1; - } - return core_id; -} - Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() { - return impl->cores[CurrentPhysicalCoreIndex()]; + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return impl->cores[core_id]; } const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { - return impl->cores[CurrentPhysicalCoreIndex()]; + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return impl->cores[core_id]; } Kernel::KScheduler* KernelCore::CurrentScheduler() { @@ -1027,9 +1029,6 @@ void KernelCore::Suspend(bool in_suspention) { impl->suspend_threads[core_id]->SetState(state); impl->suspend_threads[core_id]->SetWaitReasonForDebugging( ThreadWaitReasonForDebugging::Suspended); - if (!should_suspend) { - impl->suspend_threads[core_id]->DisableDispatch(); - } } } } @@ -1044,11 +1043,13 @@ void KernelCore::ExceptionalExit() { } void KernelCore::EnterSVCProfile() { - impl->svc_ticks[CurrentPhysicalCoreIndex()] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC)); + std::size_t core = impl->GetCurrentHostThreadID(); + impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC)); } void KernelCore::ExitSVCProfile() { - MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[CurrentPhysicalCoreIndex()]); + std::size_t core = impl->GetCurrentHostThreadID(); + MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); } std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) { diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 57535433b..3a6db0b1c 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -146,9 +146,6 @@ public: /// Gets the an instance of the respective physical CPU core. const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; - /// Gets the current physical core index for the running host thread. - std::size_t CurrentPhysicalCoreIndex() const; - /// Gets the sole instance of the Scheduler at the current running core. Kernel::KScheduler* CurrentScheduler(); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 890c52198..62fb06c45 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -877,7 +877,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, Handle const u64 thread_ticks = current_thread->GetCpuTime(); out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); - } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { + } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; } diff --git a/src/core/hle/service/ngct/ngct.cpp b/src/core/hle/service/ngct/ngct.cpp new file mode 100644 index 000000000..deb3abb28 --- /dev/null +++ b/src/core/hle/service/ngct/ngct.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included + +#include "common/string_util.h" +#include "core/core.h" +#include "core/hle/ipc_helpers.h" +#include "core/hle/service/ngct/ngct.h" +#include "core/hle/service/service.h" + +namespace Service::NGCT { + +class IService final : public ServiceFramework<IService> { +public: + explicit IService(Core::System& system_) : ServiceFramework{system_, "ngct:u"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "Match"}, + {1, &IService::Filter, "Filter"}, + }; + // clang-format on + + RegisterHandlers(functions); + } + +private: + void Filter(Kernel::HLERequestContext& ctx) { + const auto buffer = ctx.ReadBuffer(); + const auto text = Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>(buffer.data()), buffer.size()); + + LOG_WARNING(Service_NGCT, "(STUBBED) called, text={}", text); + + // Return the same string since we don't censor anything + ctx.WriteBuffer(buffer); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } +}; + +void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { + std::make_shared<IService>(system)->InstallAsService(system.ServiceManager()); +} + +} // namespace Service::NGCT diff --git a/src/core/hle/service/ngct/ngct.h b/src/core/hle/service/ngct/ngct.h new file mode 100644 index 000000000..1f2a47b78 --- /dev/null +++ b/src/core/hle/service/ngct/ngct.h @@ -0,0 +1,20 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included + +#pragma once + +namespace Core { +class System; +} + +namespace Service::SM { +class ServiceManager; +} + +namespace Service::NGCT { + +/// Registers all NGCT services with the specified service manager. +void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system); + +} // namespace Service::NGCT diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 0a53c0c81..9decb9290 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -277,37 +277,45 @@ private: void GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_NIFM, "(STUBBED) called"); - const SfNetworkProfileData network_profile_data{ - .ip_setting_data{ - .ip_address_setting{ - .is_automatic{true}, - .current_address{192, 168, 1, 100}, - .subnet_mask{255, 255, 255, 0}, - .gateway{192, 168, 1, 1}, - }, - .dns_setting{ - .is_automatic{true}, - .primary_dns{1, 1, 1, 1}, - .secondary_dns{1, 0, 0, 1}, + const auto net_iface = Network::GetSelectedNetworkInterface(); + + const SfNetworkProfileData network_profile_data = [&net_iface] { + if (!net_iface) { + return SfNetworkProfileData{}; + } + + return SfNetworkProfileData{ + .ip_setting_data{ + .ip_address_setting{ + .is_automatic{true}, + .current_address{Network::TranslateIPv4(net_iface->ip_address)}, + .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)}, + .gateway{Network::TranslateIPv4(net_iface->gateway)}, + }, + .dns_setting{ + .is_automatic{true}, + .primary_dns{1, 1, 1, 1}, + .secondary_dns{1, 0, 0, 1}, + }, + .proxy_setting{ + .enabled{false}, + .port{}, + .proxy_server{}, + .automatic_auth_enabled{}, + .user{}, + .password{}, + }, + .mtu{1500}, }, - .proxy_setting{ - .enabled{false}, - .port{}, - .proxy_server{}, - .automatic_auth_enabled{}, - .user{}, - .password{}, + .uuid{0xdeadbeef, 0xdeadbeef}, + .network_name{"yuzu Network"}, + .wireless_setting_data{ + .ssid_length{12}, + .ssid{"yuzu Network"}, + .passphrase{"yuzupassword"}, }, - .mtu{1500}, - }, - .uuid{0xdeadbeef, 0xdeadbeef}, - .network_name{"yuzu Network"}, - .wireless_setting_data{ - .ssid_length{12}, - .ssid{"yuzu Network"}, - .passphrase{"yuzupassword"}, - }, - }; + }; + }(); ctx.WriteBuffer(network_profile_data); @@ -352,38 +360,33 @@ private: LOG_WARNING(Service_NIFM, "(STUBBED) called"); struct IpConfigInfo { - IpAddressSetting ip_address_setting; - DnsSetting dns_setting; + IpAddressSetting ip_address_setting{}; + DnsSetting dns_setting{}; }; static_assert(sizeof(IpConfigInfo) == sizeof(IpAddressSetting) + sizeof(DnsSetting), "IpConfigInfo has incorrect size."); - IpConfigInfo ip_config_info{ - .ip_address_setting{ - .is_automatic{true}, - .current_address{0, 0, 0, 0}, - .subnet_mask{255, 255, 255, 0}, - .gateway{192, 168, 1, 1}, - }, - .dns_setting{ - .is_automatic{true}, - .primary_dns{1, 1, 1, 1}, - .secondary_dns{1, 0, 0, 1}, - }, - }; + const auto net_iface = Network::GetSelectedNetworkInterface(); - const auto iface = Network::GetSelectedNetworkInterface(); - if (iface) { - ip_config_info.ip_address_setting = - IpAddressSetting{.is_automatic{true}, - .current_address{Network::TranslateIPv4(iface->ip_address)}, - .subnet_mask{Network::TranslateIPv4(iface->subnet_mask)}, - .gateway{Network::TranslateIPv4(iface->gateway)}}; + const IpConfigInfo ip_config_info = [&net_iface] { + if (!net_iface) { + return IpConfigInfo{}; + } - } else { - LOG_ERROR(Service_NIFM, - "Couldn't get host network configuration info, using default values"); - } + return IpConfigInfo{ + .ip_address_setting{ + .is_automatic{true}, + .current_address{Network::TranslateIPv4(net_iface->ip_address)}, + .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)}, + .gateway{Network::TranslateIPv4(net_iface->gateway)}, + }, + .dns_setting{ + .is_automatic{true}, + .primary_dns{1, 1, 1, 1}, + .secondary_dns{1, 0, 0, 1}, + }, + }; + }(); IPC::ResponseBuilder rb{ctx, 2 + (sizeof(IpConfigInfo) + 3) / sizeof(u32)}; rb.Push(ResultSuccess); diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index ce6065db2..a33e47d0b 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, const Common::Rectangle<int>& crop_rect) { - VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); + const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", addr, offset, width, height, stride, format); - using PixelFormat = Tegra::FramebufferConfig::PixelFormat; - const Tegra::FramebufferConfig framebuffer{ - addr, offset, width, height, stride, static_cast<PixelFormat>(format), - transform, crop_rect}; + const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); + const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, + stride, pixel_format, transform, crop_rect}; system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 759247eb0..78de3f354 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -42,7 +42,9 @@ struct IGBPBuffer { u32_le index; INSERT_PADDING_WORDS(3); u32_le gpu_buffer_id; - INSERT_PADDING_WORDS(17); + INSERT_PADDING_WORDS(6); + u32_le external_format; + INSERT_PADDING_WORDS(10); u32_le nvmap_handle; u32_le offset; INSERT_PADDING_WORDS(60); diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 00bff8caf..3ead813b0 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -298,7 +298,7 @@ void NVFlinger::Compose() { auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0"); ASSERT(nvdisp); - nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format, + nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format, igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->get().transform, buffer->get().crop_rect); diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index b3e50433b..065133166 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -46,6 +46,7 @@ #include "core/hle/service/ncm/ncm.h" #include "core/hle/service/nfc/nfc.h" #include "core/hle/service/nfp/nfp.h" +#include "core/hle/service/ngct/ngct.h" #include "core/hle/service/nifm/nifm.h" #include "core/hle/service/nim/nim.h" #include "core/hle/service/npns/npns.h" @@ -271,6 +272,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system NCM::InstallInterfaces(*sm, system); NFC::InstallInterfaces(*sm, system); NFP::InstallInterfaces(*sm, system); + NGCT::InstallInterfaces(*sm, system); NIFM::InstallInterfaces(*sm, system); NIM::InstallInterfaces(*sm, system); NPNS::InstallInterfaces(*sm, system); diff --git a/src/core/network/network_interface.cpp b/src/core/network/network_interface.cpp index cecc9aa11..6811f21b1 100644 --- a/src/core/network/network_interface.cpp +++ b/src/core/network/network_interface.cpp @@ -37,73 +37,73 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() { AF_INET, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_INCLUDE_GATEWAYS, nullptr, adapter_addresses.data(), &buf_size); - if (ret == ERROR_BUFFER_OVERFLOW) { - adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1); - } else { + if (ret != ERROR_BUFFER_OVERFLOW) { break; } + + adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1); } - if (ret == NO_ERROR) { - std::vector<NetworkInterface> result; + if (ret != NO_ERROR) { + LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses"); + return {}; + } - for (auto current_address = adapter_addresses.data(); current_address != nullptr; - current_address = current_address->Next) { - if (current_address->FirstUnicastAddress == nullptr || - current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) { - continue; - } + std::vector<NetworkInterface> result; - if (current_address->OperStatus != IfOperStatusUp) { - continue; - } + for (auto current_address = adapter_addresses.data(); current_address != nullptr; + current_address = current_address->Next) { + if (current_address->FirstUnicastAddress == nullptr || + current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) { + continue; + } - const auto ip_addr = Common::BitCast<struct sockaddr_in>( - *current_address->FirstUnicastAddress->Address.lpSockaddr) - .sin_addr; + if (current_address->OperStatus != IfOperStatusUp) { + continue; + } - ULONG mask = 0; - if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength, - &mask) != NO_ERROR) { - LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask"); - continue; - } + const auto ip_addr = Common::BitCast<struct sockaddr_in>( + *current_address->FirstUnicastAddress->Address.lpSockaddr) + .sin_addr; - struct in_addr gateway = {.S_un{.S_addr{0}}}; - if (current_address->FirstGatewayAddress != nullptr && - current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) { - gateway = Common::BitCast<struct sockaddr_in>( - *current_address->FirstGatewayAddress->Address.lpSockaddr) - .sin_addr; - } + ULONG mask = 0; + if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength, + &mask) != NO_ERROR) { + LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask"); + continue; + } - result.push_back(NetworkInterface{ - .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})}, - .ip_address{ip_addr}, - .subnet_mask = in_addr{.S_un{.S_addr{mask}}}, - .gateway = gateway}); + struct in_addr gateway = {.S_un{.S_addr{0}}}; + if (current_address->FirstGatewayAddress != nullptr && + current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) { + gateway = Common::BitCast<struct sockaddr_in>( + *current_address->FirstGatewayAddress->Address.lpSockaddr) + .sin_addr; } - return result; - } else { - LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses"); - return {}; + result.emplace_back(NetworkInterface{ + .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})}, + .ip_address{ip_addr}, + .subnet_mask = in_addr{.S_un{.S_addr{mask}}}, + .gateway = gateway}); } + + return result; } #else std::vector<NetworkInterface> GetAvailableNetworkInterfaces() { - std::vector<NetworkInterface> result; - struct ifaddrs* ifaddr = nullptr; if (getifaddrs(&ifaddr) != 0) { LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}", std::strerror(errno)); - return result; + return {}; } + std::vector<NetworkInterface> result; + for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) { if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) { continue; @@ -117,55 +117,62 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() { continue; } - std::uint32_t gateway{0}; + u32 gateway{}; + std::ifstream file{"/proc/net/route"}; - if (file.is_open()) { + if (!file.is_open()) { + LOG_ERROR(Network, "Failed to open \"/proc/net/route\""); - // ignore header - file.ignore(std::numeric_limits<std::streamsize>::max(), '\n'); + result.emplace_back(NetworkInterface{ + .name{ifa->ifa_name}, + .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr}, + .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr}, + .gateway{in_addr{.s_addr = gateway}}}); + continue; + } - bool gateway_found = false; + // ignore header + file.ignore(std::numeric_limits<std::streamsize>::max(), '\n'); - for (std::string line; std::getline(file, line);) { - std::istringstream iss{line}; + bool gateway_found = false; - std::string iface_name{}; - iss >> iface_name; - if (iface_name != ifa->ifa_name) { - continue; - } + for (std::string line; std::getline(file, line);) { + std::istringstream iss{line}; - iss >> std::hex; + std::string iface_name; + iss >> iface_name; + if (iface_name != ifa->ifa_name) { + continue; + } - std::uint32_t dest{0}; - iss >> dest; - if (dest != 0) { - // not the default route - continue; - } + iss >> std::hex; - iss >> gateway; + u32 dest{}; + iss >> dest; + if (dest != 0) { + // not the default route + continue; + } - std::uint16_t flags{0}; - iss >> flags; + iss >> gateway; - // flag RTF_GATEWAY (defined in <linux/route.h>) - if ((flags & 0x2) == 0) { - continue; - } + u16 flags{}; + iss >> flags; - gateway_found = true; - break; + // flag RTF_GATEWAY (defined in <linux/route.h>) + if ((flags & 0x2) == 0) { + continue; } - if (!gateway_found) { - gateway = 0; - } - } else { - LOG_ERROR(Network, "Failed to open \"/proc/net/route\""); + gateway_found = true; + break; } - result.push_back(NetworkInterface{ + if (!gateway_found) { + gateway = 0; + } + + result.emplace_back(NetworkInterface{ .name{ifa->ifa_name}, .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr}, .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr}, @@ -180,11 +187,11 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() { #endif std::optional<NetworkInterface> GetSelectedNetworkInterface() { - const std::string& selected_network_interface = Settings::values.network_interface.GetValue(); + const auto& selected_network_interface = Settings::values.network_interface.GetValue(); const auto network_interfaces = Network::GetAvailableNetworkInterfaces(); if (network_interfaces.size() == 0) { LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces"); - return {}; + return std::nullopt; } const auto res = @@ -192,12 +199,12 @@ std::optional<NetworkInterface> GetSelectedNetworkInterface() { return iface.name == selected_network_interface; }); - if (res != network_interfaces.end()) { - return *res; - } else { + if (res == network_interfaces.end()) { LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface); - return {}; + return std::nullopt; } + + return *res; } } // namespace Network diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,8 @@ namespace Shader::Backend::SPIRV { namespace { +constexpr size_t NUM_FIXEDFNCTEXTURE = 10; + enum class Operation { Increment, Decrement, @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { return pointer_type; } } + +size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, + size_t start_offset) { + for (size_t location = start_offset; location < used_locations.size(); ++location) { + if (!used_locations.test(location)) { + return location; + } + } + throw RuntimeError("Unable to get an unused location for legacy attribute"); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { const AttributeType input_type{runtime_info.generic_input_types[index]}; if (!runtime_info.previous_stage_stores.Generic(index)) { @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { if (input_type == AttributeType::Disabled) { continue; } + used_locations.set(index); const Id type{GetAttributeType(*this, input_type)}; const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { break; } } + size_t previous_unused_location = 0; + if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_fixed_fnc_textures[index] = id; + } + } if (stage == Stage::TessellationEval) { for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); + used_locations.set(index); + } + } + size_t previous_unused_location = 0; + if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); + output_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, location); + output_fixed_fnc_textures[index] = id; } } switch (stage) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -268,10 +268,14 @@ public: Id write_global_func_u32x4{}; Id input_position{}; + Id input_front_color{}; + std::array<Id, 10> input_fixed_fnc_textures{}; std::array<Id, 32> input_generics{}; Id output_point_size{}; Id output_position{}; + Id output_front_color{}; + std::array<Id, 10> output_fixed_fnc_textures{}; std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; Id output_tess_level_outer{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 14c77f162..68f360b3c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } } +bool IsFixedFncTexture(IR::Attribute attribute) { + return attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q; +} + +u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; +} + +u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return static_cast<u32>(attribute) % 4u; +} + template <typename... Args> Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { if (ctx.stage == Stage::TessellationControl) { @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const u32 element{FixedFncTextureAttributeElement(attr)}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], + element_id); + } switch (attr) { case IR::Attribute::PointSize: return ctx.output_point_size; @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); + } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const Id attr_id{ctx.input_fixed_fnc_textures[index]}; + const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; + return ctx.OpLoad(ctx.F32[1], attr_ptr); + } switch (attr) { case IR::Attribute::PrimitiveId: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); @@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionW: return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, + ctx.Const(element))); + } case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -333,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } case IR::Attribute::FrontFace: - return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), - ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); + return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), + ctx.f32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -20,6 +20,7 @@ #include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { @@ -652,7 +653,7 @@ class TranslatePass { public: TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::AbstractSyntaxList& syntax_list_) + IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); @@ -660,6 +661,9 @@ public: IR::Block& first_block{*syntax_list.front().data.block}; IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); + if (uses_demote_to_helper && host_info.needs_demote_reorder) { + DemoteCombinationPass(); + } } private: @@ -809,7 +813,14 @@ private: } case StatementType::Return: { ensure_block(); - IR::IREmitter{*current_block}.Epilogue(); + IR::Block* return_block{block_pool.Create(inst_pool)}; + IR::IREmitter{*return_block}.Epilogue(); + current_block->AddBranch(return_block); + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = return_block; + current_block = nullptr; syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; @@ -824,6 +835,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; merge.data.block = demote_block; + uses_demote_to_helper = true; break; } case StatementType::Unreachable: { @@ -855,11 +867,117 @@ private: return block_pool.Create(inst_pool); } + void DemoteCombinationPass() { + using Type = IR::AbstractSyntaxNode::Type; + std::vector<IR::Block*> demote_blocks; + std::vector<IR::U1> demote_conds; + u32 num_epilogues{}; + u32 branch_depth{}; + for (const IR::AbstractSyntaxNode& node : syntax_list) { + if (node.type == Type::If) { + ++branch_depth; + } + if (node.type == Type::EndIf) { + --branch_depth; + } + if (node.type != Type::Block) { + continue; + } + if (branch_depth > 1) { + // Skip reordering nested demote branches. + continue; + } + for (const IR::Inst& inst : node.data.block->Instructions()) { + const IR::Opcode op{inst.GetOpcode()}; + if (op == IR::Opcode::DemoteToHelperInvocation) { + demote_blocks.push_back(node.data.block); + break; + } + if (op == IR::Opcode::Epilogue) { + ++num_epilogues; + } + } + } + if (demote_blocks.size() == 0) { + return; + } + if (num_epilogues > 1) { + LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); + return; + } + s64 last_iterator_offset{}; + auto& asl{syntax_list}; + for (const IR::Block* demote_block : demote_blocks) { + const auto start_it{asl.begin() + last_iterator_offset}; + auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::If && asn.data.if_node.body == demote_block; + })}; + if (asl_it == asl.end()) { + // Demote without a conditional branch. + // No need to proceed since all fragment instances will be demoted regardless. + return; + } + const IR::Block* const end_if = asl_it->data.if_node.merge; + demote_conds.push_back(asl_it->data.if_node.cond); + last_iterator_offset = std::distance(asl.begin(), asl_it); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::Block && asn.data.block == demote_block; + }); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; + }); + asl_it = asl.erase(asl_it); + } + const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { + if (asn.type != Type::Block) { + return false; + } + for (const auto& inst : asn.data.block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return true; + } + } + return false; + }}; + const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; + const auto return_block_it{(reverse_it + 1).base()}; + + IR::IREmitter ir{*(return_block_it - 1)->data.block}; + IR::U1 cond(IR::Value(false)); + for (const auto& demote_cond : demote_conds) { + cond = ir.LogicalOr(cond, demote_cond); + } + cond.Inst()->DestructiveAddUsage(1); + + IR::AbstractSyntaxNode demote_if_node{}; + demote_if_node.type = Type::If; + demote_if_node.data.if_node.cond = cond; + demote_if_node.data.if_node.body = demote_blocks[0]; + demote_if_node.data.if_node.merge = return_block_it->data.block; + + IR::AbstractSyntaxNode demote_node{}; + demote_node.type = Type::Block; + demote_node.data.block = demote_blocks[0]; + + IR::AbstractSyntaxNode demote_endif_node{}; + demote_endif_node.type = Type::EndIf; + demote_endif_node.data.end_if.merge = return_block_it->data.block; + + asl.insert(return_block_it, demote_endif_node); + asl.insert(return_block_it, demote_node); + asl.insert(return_block_it, demote_if_node); + } + ObjectPool<Statement>& stmt_pool; ObjectPool<IR::Inst>& inst_pool; ObjectPool<IR::Block>& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; + bool uses_demote_to_helper{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -871,12 +989,13 @@ private: } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, + const HostTranslateInfo& host_info) { ObjectPool<Statement> stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,10 +11,13 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -namespace Shader::Maxwell { +namespace Shader { +struct HostTranslateInfo; +namespace Maxwell { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); -} // namespace Shader::Maxwell +} // namespace Maxwell +} // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -11,8 +11,9 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { - bool support_float16{}; ///< True when the device supports 16-bit floats - bool support_int64{}; ///< True when the device supports 64-bit integers + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers + bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c..be2113f5a 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -261,16 +261,6 @@ public: stream_score += score; } - /// Sets the new frame tick - void SetFrameTick(u64 new_frame_tick) noexcept { - frame_tick = new_frame_tick; - } - - /// Returns the new frame tick - [[nodiscard]] u64 FrameTick() const noexcept { - return frame_tick; - } - /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; @@ -307,6 +297,14 @@ public: return words.size_bytes; } + size_t getLRUID() const noexcept { + return lru_id; + } + + void setLRUID(size_t lru_id_) { + lru_id = lru_id_; + } + private: template <Type type> u64* Array() noexcept { @@ -603,9 +601,9 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; Words words; - u64 frame_tick = 0; BufferFlagBits flags{}; int stream_score = 0; + size_t lru_id = SIZE_MAX; }; } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9..7bfd57369 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -330,7 +331,7 @@ private: template <bool insert> void ChangeRegister(BufferId buffer_id); - void TouchBuffer(Buffer& buffer) const noexcept; + void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -428,7 +429,11 @@ private: size_t immediate_buffer_capacity = 0; std::unique_ptr<u8[]> immediate_buffer_alloc; - typename SlotVector<Buffer>::Iterator deletion_iterator; + struct LRUItemParams { + using ObjectType = BufferId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; @@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); - deletion_iterator = slot_buffers.end(); common_ranges.clear(); } @@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_buffers.end()) { - deletion_iterator = slot_buffers.begin(); - } - ++deletion_iterator; - if (deletion_iterator == slot_buffers.end()) { - break; - } - const auto [buffer_id, buffer] = *deletion_iterator; - if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { - DownloadBufferMemory(*buffer); - DeleteBuffer(buffer_id); + const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + if (num_iterations == 0) { + return true; } - } + --num_iterations; + auto& buffer = slot_buffers[buffer_id]; + DownloadBufferMemory(buffer); + DeleteBuffer(buffer_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template <class P> @@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { + if (total_used_memory >= EXPECTED_MEMORY) { RunGarbageCollector(); } ++frame_tick; @@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { template <class P> void BufferCache<P>::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, index_buffer.buffer_id); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { const Binding& binding = vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size); @@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { const Binding& binding = storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { const Binding& binding = compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); const u32 size = static_cast<u32>(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); - TouchBuffer(slot_buffers[new_buffer_id]); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } Register(new_buffer_id); + TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); return new_buffer_id; } @@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) { template <class P> template <bool insert> void BufferCache<P>::ChangeRegister(BufferId buffer_id) { - const Buffer& buffer = slot_buffers[buffer_id]; + Buffer& buffer = slot_buffers[buffer_id]; const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); + buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); } else { total_used_memory -= Common::AlignUp(size, 1024); + lru_cache.Free(buffer.getLRUID()); } const VAddr cpu_addr_begin = buffer.CpuAddr(); const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { } template <class P> -void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { - buffer.SetFrameTick(frame_tick); +void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { + if (buffer_id != NULL_BUFFER_ID) { + lru_cache.Touch(buffer.getLRUID(), frame_tick); + } } template <class P> diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 70030066a..d7e749485 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); + ASSERT(!current_frame_info.segment_enabled); uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 87eafdb03..3b1ed4b3a 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -22,7 +22,7 @@ struct Vp9FrameDimensions { }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); -enum FrameFlags : u32 { +enum class FrameFlags : u32 { IsKeyFrame = 1 << 0, LastFrameIsKeyFrame = 1 << 1, FrameSizeChanged = 1 << 2, @@ -30,6 +30,7 @@ enum FrameFlags : u32 { LastShowFrame = 1 << 4, IntraOnly = 1 << 5, }; +DECLARE_ENUM_FLAG_OPERATORS(FrameFlags) enum class TxSize { Tx4x4 = 0, // 4x4 transform @@ -92,44 +93,34 @@ struct Vp9EntropyProbs { static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame; - bool intra_only; - bool last_frame_was_key; - bool frame_size_changed; - bool error_resilient_mode; - bool last_frame_shown; - bool show_frame; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; std::array<s8, 4> ref_frame_sign_bias; s32 base_q_index; s32 y_dc_delta_q; s32 uv_dc_delta_q; s32 uv_ac_delta_q; - bool lossless; s32 transform_mode; - bool allow_high_precision_mv; s32 interp_filter; s32 reference_mode; - s8 comp_fixed_ref; - std::array<s8, 2> comp_var_ref; s32 log2_tile_cols; s32 log2_tile_rows; - bool segment_enabled; - bool segment_map_update; - bool segment_map_temporal_update; - s32 segment_abs_delta; - std::array<u32, 8> segment_feature_enable; - std::array<std::array<s16, 4>, 8> segment_feature_data; - bool mode_ref_delta_enabled; - bool use_prev_in_find_mv_refs; std::array<s8, 4> ref_deltas; std::array<s8, 2> mode_deltas; Vp9EntropyProbs entropy; Vp9FrameDimensions frame_size; u8 first_level; u8 sharpness_level; - u32 bitstream_size; - std::array<u64, 4> frame_offsets; - std::array<bool, 4> refresh_frame; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + bool lossless; + bool allow_high_precision_mv; + bool segment_enabled; + bool mode_ref_delta_enabled; }; struct Vp9FrameContainer { @@ -145,7 +136,7 @@ struct PictureInfo { Vp9FrameDimensions golden_frame_size; ///< 0x50 Vp9FrameDimensions alt_frame_size; ///< 0x58 Vp9FrameDimensions current_frame_size; ///< 0x60 - u32 vp9_flags; ///< 0x68 + FrameFlags vp9_flags; ///< 0x68 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C u8 first_level; ///< 0x70 u8 sharpness_level; ///< 0x71 @@ -158,60 +149,43 @@ struct PictureInfo { u8 allow_high_precision_mv; ///< 0x78 u8 interp_filter; ///< 0x79 u8 reference_mode; ///< 0x7A - s8 comp_fixed_ref; ///< 0x7B - std::array<s8, 2> comp_var_ref; ///< 0x7C + INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B u8 log2_tile_cols; ///< 0x7E u8 log2_tile_rows; ///< 0x7F Segmentation segmentation; ///< 0x80 LoopFilter loop_filter; ///< 0xE4 - INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB - u32 surface_params; ///< 0xF0 - INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 + INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB [[nodiscard]] Vp9PictureInfo Convert() const { return { - .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, - .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, - .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, - .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, - .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, - .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, - .show_frame = true, + .bitstream_size = bitstream_size, + .frame_offsets{}, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, .uv_dc_delta_q = uv_dc_delta_q, .uv_ac_delta_q = uv_ac_delta_q, - .lossless = lossless != 0, .transform_mode = tx_mode, - .allow_high_precision_mv = allow_high_precision_mv != 0, .interp_filter = interp_filter, .reference_mode = reference_mode, - .comp_fixed_ref = comp_fixed_ref, - .comp_var_ref = comp_var_ref, .log2_tile_cols = log2_tile_cols, .log2_tile_rows = log2_tile_rows, - .segment_enabled = segmentation.enabled != 0, - .segment_map_update = segmentation.update_map != 0, - .segment_map_temporal_update = segmentation.temporal_update != 0, - .segment_abs_delta = segmentation.abs_delta, - .segment_feature_enable = segmentation.feature_mask, - .segment_feature_data = segmentation.feature_data, - .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, - .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && - !(vp9_flags == (FrameFlags::FrameSizeChanged)) && - !(vp9_flags == (FrameFlags::IntraOnly)) && - (vp9_flags == (FrameFlags::LastShowFrame)) && - !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, - .bitstream_size = bitstream_size, - .frame_offsets{}, - .refresh_frame{}, + .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame), + .intra_only = True(vp9_flags & FrameFlags::IntraOnly), + .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame), + .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode), + .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame), + .show_frame = true, + .lossless = lossless != 0, + .allow_high_precision_mv = allow_high_precision_mv != 0, + .segment_enabled = segmentation.enabled != 0, + .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, }; } }; @@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48); ASSERT_POSITION(first_level, 0x70); ASSERT_POSITION(segmentation, 0x80); ASSERT_POSITION(loop_filter, 0xE4); -ASSERT_POSITION(surface_params, 0xF0); #undef ASSERT_POSITION #define ASSERT_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1aa43523a..7f4ca6282 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -475,10 +475,10 @@ public: // These values are used by Nouveau and some games. AddGL = 0x8006, - SubtractGL = 0x8007, - ReverseSubtractGL = 0x8008, - MinGL = 0x800a, - MaxGL = 0x800b + MinGL = 0x8007, + MaxGL = 0x8008, + SubtractGL = 0x800a, + ReverseSubtractGL = 0x800b }; enum class Factor : u32 { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee992aed4..de9e41659 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -156,6 +156,10 @@ public: return shader_backend; } + bool IsAmd() const { + return vendor_name == "ATI Technologies Inc."; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f4dda17e..b0e14182e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo host_info{ .support_float16 = false, .support_int64 = device.HasShaderInt64(), + .needs_demote_reorder = device.IsAmd(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c9b0d6db..9ff0a28cd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { blit_screen.Recreate(); } const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); - scheduler.Flush(render_semaphore); + const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); + scheduler.Flush(render_semaphore, present_semaphore); scheduler.WaitWorker(); swapchain.Present(render_semaphore); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 3a78c9daa..888bc7392 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); - const size_t size_bytes = GetSizeInBytes(framebuffer); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel, + framebuffer.stride, framebuffer.height, + 1, block_height_log2, 0)}; Tegra::Texture::UnswizzleTexture( mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f316c4f92..31bfbcb06 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), + .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || + driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, }; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 4840962de..1d438787a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(VkSemaphore semaphore) { - SubmitExecution(semaphore); +void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + SubmitExecution(signal_semaphore, wait_semaphore); AllocateNewContext(); } -void VKScheduler::Finish(VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { const u64 presubmit_tick = CurrentTick(); - SubmitExecution(semaphore); + SubmitExecution(signal_semaphore, wait_semaphore); WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); @@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() { }); } -void VKScheduler::SubmitExecution(VkSemaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { EndPendingOperations(); InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); - Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); - - const u32 num_signal_semaphores = semaphore ? 2U : 1U; - - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + + const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + }; const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), .signalSemaphoreValueCount = num_signal_semaphores, .pSignalSemaphoreValues = signal_values.data(), }; const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1, .pCommandBuffers = cmdbuf.address(), .signalSemaphoreCount = num_signal_semaphores, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cf39a2363..759ed5a48 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -34,10 +34,10 @@ public: ~VKScheduler(); /// Sends the current execution context to the GPU. - void Flush(VkSemaphore semaphore = nullptr); + void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(VkSemaphore semaphore = nullptr); + void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -191,7 +191,7 @@ private: void AllocateWorkerCommandBuffer(); - void SubmitExecution(VkSemaphore semaphore); + void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index e5318e52d..aadf03cb0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -106,14 +106,12 @@ void VKSwapchain::AcquireNextImage() { } void VKSwapchain::Present(VkSemaphore render_semaphore) { - const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; - const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreCount = render_semaphore ? 2U : 1U, - .pWaitSemaphores = semaphores.data(), + .waitSemaphoreCount = render_semaphore ? 1U : 0U, + .pWaitSemaphores = &render_semaphore, .swapchainCount = 1, .pSwapchains = swapchain.address(), .pImageIndices = &image_index, diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index cd472dd0a..5bce41e21 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -72,6 +72,10 @@ public: return image_view_format; } + VkSemaphore CurrentPresentSemaphore() const { + return *present_semaphores[frame_index]; + } + private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..0c17a791b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - u64 frame_tick = 0; + size_t lru_index = SIZE_MAX; std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..24b809242 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,7 +5,6 @@ #pragma once #include "common/alignment.h" -#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); - deletion_iterator = slot_images.begin(); - if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 3) / 10; @@ -64,70 +61,38 @@ template <class P> void TextureCache<P>::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; + size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); + const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + if (num_iterations == 0) { + return true; } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } + --num_iterations; + auto& image = slot_images[image_id]; + const bool must_download = image.IsSafeDownload(); + if (!high_priority_mode && must_download) { + return false; } - ++deletion_iterator; - } + if (must_download) { + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template <class P> void TextureCache<P>::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + if (total_used_memory > minimum_memory) { RunGarbageCollector(); } sentenced_images.Tick(); @@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + image.lru_index = lru_cache.Insert(image_id, frame_tick); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { @@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); + lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( u64 page, @@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - image.frame_tick = frame_tick; + lru_cache.Touch(image.lru_index, frame_tick); } template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" @@ -370,6 +371,12 @@ private: std::vector<ImageId> uncommitted_downloads; std::queue<std::vector<ImageId>> committed_downloads; + struct LRUItemParams { + using ObjectType = ImageId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; + static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; @@ -379,7 +386,6 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; - typename SlotVector<Image>::Iterator deletion_iterator; }; } // namespace VideoCommon diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index c010b9353..24e943e4c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 const u32 unswizzled_offset = slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; - if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset); - offset >= input.size()) { - // TODO(Rodrigo): This is an out of bounds access that should never happen. To - // avoid crashing the emulator, break. - ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size()); - break; - } - u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 377795326..85d292bcc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -818,7 +818,6 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); - ReadGlobalSetting(Settings::values.use_caches_gc); ReadGlobalSetting(Settings::values.bg_red); ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); @@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() { Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); - WriteGlobalSetting(Settings::values.use_caches_gc); WriteGlobalSetting(Settings::values.bg_red); WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 099ddbb7c..43f1887d1 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -156,7 +156,7 @@ <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> - <string>Use disk shader cache</string> + <string>Use disk pipeline cache</string> </property> </widget> </item> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a31b8e192..bfd464061 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); - ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); if (Settings::IsConfiguringGlobal()) { @@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc, - use_caches_gc); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, ui->use_fast_gpu_time, use_fast_gpu_time); } @@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); - ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( Settings::values.max_anisotropy.UsingGlobal()); @@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { use_asynchronous_shaders); ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, Settings::values.use_fast_gpu_time, use_fast_gpu_time); - ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc, - use_caches_gc); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 7356e6916..13ba4ff6b 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -37,5 +37,4 @@ private: ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; - ConfigurationShared::CheckState use_caches_gc; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 4fe6b86ae..b91abc2f0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,7 +82,7 @@ <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> </property> <property name="text"> - <string>Use asynchronous shader building (hack)</string> + <string>Use asynchronous shader building (Hack)</string> </property> </widget> </item> @@ -92,17 +92,7 @@ <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> </property> <property name="text"> - <string>Use Fast GPU Time (hack)</string> - </property> - </widget> - </item> - <item> - <widget class="QCheckBox" name="use_caches_gc"> - <property name="toolTip"> - <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string> - </property> - <property name="text"> - <string>Enable GPU cache garbage collection (experimental)</string> + <string>Use Fast GPU Time (Hack)</string> </property> </widget> </item> diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index e97804220..f9d949e75 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location")); QAction* open_transferable_shader_cache = - context_menu.addAction(tr("Open Transferable Shader Cache")); + context_menu.addAction(tr("Open Transferable Pipeline Cache")); context_menu.addSeparator(); QMenu* remove_menu = context_menu.addMenu(tr("Remove")); QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); - QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); - QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); + QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); + QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache")); remove_menu->addSeparator(); - QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); + QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches")); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 4f14be524..757dd1ea0 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -468,7 +468,6 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); - ReadSetting("Renderer", Settings::values.use_caches_gc); ReadSetting("Renderer", Settings::values.bg_red); ReadSetting("Renderer", Settings::values.bg_green); |