From f5e32935ca9d1727624c86ca78aff91027caf819 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 28 Mar 2020 15:23:28 -0400 Subject: SingleCore: Use Cycle Timing instead of Host Timing. --- src/core/arm/arm_interface.h | 6 +++-- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 37 +++++++++++++++++++--------- src/core/arm/dynarmic/arm_dynarmic_32.h | 2 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 39 +++++++++++++++++++---------- src/core/arm/dynarmic/arm_dynarmic_64.h | 2 +- src/core/arm/unicorn/arm_unicorn.cpp | 6 ++--- src/core/arm/unicorn/arm_unicorn.h | 4 +-- src/core/core_timing.cpp | 41 +++++++++++++++++++++++++------ src/core/core_timing.h | 14 ++++++++--- src/core/core_timing_util.cpp | 29 +++++++++++++++------- src/core/core_timing_util.h | 15 +++-------- src/core/cpu_manager.cpp | 18 +++++++------- src/core/cpu_manager.h | 2 +- src/core/hle/kernel/svc.cpp | 5 ++++ src/core/hle/kernel/thread.cpp | 12 ++++++--- 15 files changed, 152 insertions(+), 80 deletions(-) diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index e5c484336..fbdce4134 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -26,8 +26,9 @@ using CPUInterrupts = std::arrayticks -= ticks; + if (parent.uses_wall_clock) { + return; + } + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = + (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max(amortized_ticks, 1); + + parent.system.CoreTiming().AddTicks(amortized_ticks); + num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return std::max(ticks, 0); + if (parent.uses_wall_clock) { + if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { + return std::max(1000U, 0); + } + return 0ULL; } - return 0ULL; - } - - void ResetTicks() { - ticks = 1000LL; + return std::max(parent.system.CoreTiming().GetDowncount(), 0LL); } ARM_Dynarmic_32& parent; std::size_t num_interpreted_instructions{}; - s64 ticks{}; }; std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, @@ -103,7 +115,6 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable& } void ARM_Dynarmic_32::Run() { - cb->ResetTicks(); jit->Run(); } @@ -112,8 +123,10 @@ void ARM_Dynarmic_32::Step() { } ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, - ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ARM_Interface{system, interrupt_handlers}, cb(std::make_unique(*this)), + bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) + : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, + cb(std::make_unique(*this)), cp15(std::make_shared(*this)), core_index{core_index}, exclusive_monitor{dynamic_cast(exclusive_monitor)} {} diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index bea4933c8..8afd15c8b 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -29,7 +29,7 @@ class System; class ARM_Dynarmic_32 final : public ARM_Interface { public: - ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, + ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic_32() override; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 03b3313cf..a518733b6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -124,29 +124,41 @@ public: } void AddTicks(u64 ticks) override { - this->ticks -= ticks; + if (parent.uses_wall_clock) { + return; + } + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = + (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max(amortized_ticks, 1); + + parent.system.CoreTiming().AddTicks(amortized_ticks); + num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return std::max(ticks, 0); + if (parent.uses_wall_clock) { + if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { + return std::max(1000U, 0); + } + return 0ULL; } - return 0ULL; + return std::max(parent.system.CoreTiming().GetDowncount(), 0LL); } u64 GetCNTPCT() override { return parent.system.CoreTiming().GetClockTicks(); } - void ResetTicks() { - ticks = 1000LL; - } - ARM_Dynarmic_64& parent; std::size_t num_interpreted_instructions = 0; u64 tpidrro_el0 = 0; u64 tpidr_el0 = 0; - s64 ticks{}; }; std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, @@ -185,13 +197,12 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& } // CNTPCT uses wall clock. - config.wall_clock_cntpct = true; + config.wall_clock_cntpct = uses_wall_clock; return std::make_shared(config); } void ARM_Dynarmic_64::Run() { - cb->ResetTicks(); jit->Run(); } @@ -200,9 +211,11 @@ void ARM_Dynarmic_64::Step() { } ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, - ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ARM_Interface{system, interrupt_handler}, + bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) + : ARM_Interface{system, interrupt_handler, uses_wall_clock}, cb(std::make_unique(*this)), inner_unicorn{system, interrupt_handler, + uses_wall_clock, ARM_Unicorn::Arch::AArch64, core_index}, core_index{core_index}, exclusive_monitor{ diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index c26b47249..31ec16521 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -28,7 +28,7 @@ class System; class ARM_Dynarmic_64 final : public ARM_Interface { public: - ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, + ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic_64() override; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 099229c8d..1cb71942b 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -63,9 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si return false; } -ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture, - std::size_t core_index) - : ARM_Interface{system, interrupt_handler}, core_index{core_index} { +ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, + bool uses_wall_clock, Arch architecture, std::size_t core_index) + : ARM_Interface{system, interrupt_handler, uses_wall_clock}, core_index{core_index} { const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64; CHECKED(uc_open(arch, UC_MODE_ARM, &uc)); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index f09b24a85..a01751e65 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -20,8 +20,8 @@ public: AArch64, // 64-bit ARM }; - explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture, - std::size_t core_index); + explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, + bool uses_wall_clock, Arch architecture, std::size_t core_index); ~ARM_Unicorn() override; void SetPC(u64 pc) override; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 189d4aa34..12e9e60a4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -14,6 +14,8 @@ namespace Core::Timing { +constexpr u64 MAX_SLICE_LENGTH = 4000; + std::shared_ptr CreateEvent(std::string name, TimedCallback&& callback) { return std::make_shared(std::move(callback), std::move(name)); } @@ -53,6 +55,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) { void CoreTiming::Initialize(std::function&& on_thread_init_) { on_thread_init = std::move(on_thread_init_); event_fifo_id = 0; + ticks = 0; const auto empty_timed_callback = [](u64, s64) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); if (is_multicore) { @@ -126,20 +129,36 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u basic_lock.unlock(); } -void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) { - ticks_count[core_index] += ticks; +void CoreTiming::AddTicks(u64 ticks) { + this->ticks += ticks; + downcount -= ticks; } -void CoreTiming::ResetTicks(std::size_t core_index) { - ticks_count[core_index] = 0; +void CoreTiming::Idle() { + if (!event_queue.empty()) { + u64 next_event_time = event_queue.front().time; + ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; + return; + } + ticks += 1000U; +} + +void CoreTiming::ResetTicks() { + downcount = MAX_SLICE_LENGTH; } u64 CoreTiming::GetCPUTicks() const { - return clock->GetCPUCycles(); + if (is_multicore) { + return clock->GetCPUCycles(); + } + return ticks; } u64 CoreTiming::GetClockTicks() const { - return clock->GetClockCycles(); + if (is_multicore) { + return clock->GetClockCycles(); + } + return CpuCyclesToClockCycles(ticks); } void CoreTiming::ClearPendingEvents() { @@ -217,11 +236,17 @@ void CoreTiming::ThreadLoop() { } std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { - return clock->GetTimeNS(); + if (is_multicore) { + return clock->GetTimeNS(); + } + return CyclesToNs(ticks); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - return clock->GetTimeUS(); + if (is_multicore) { + return clock->GetTimeUS(); + } + return CyclesToUs(ticks); } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 03f9a5c76..ed5de9b97 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -98,9 +98,15 @@ public: /// We only permit one event of each type in the queue at a time. void RemoveEvent(const std::shared_ptr& event_type); - void AddTicks(std::size_t core_index, u64 ticks); + void AddTicks(u64 ticks); - void ResetTicks(std::size_t core_index); + void ResetTicks(); + + void Idle(); + + s64 GetDowncount() const { + return downcount; + } /// Returns current time in emulated CPU cycles u64 GetCPUTicks() const; @@ -154,7 +160,9 @@ private: bool is_multicore{}; - std::array, Core::Hardware::NUM_CPU_CORES> ticks_count{}; + /// Cycle timing + u64 ticks{}; + s64 downcount{}; }; /// Creates a core timing event with the given name and callback. diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index be34b26fe..aefc63663 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -38,15 +38,8 @@ s64 usToCycles(std::chrono::microseconds us) { } s64 nsToCycles(std::chrono::nanoseconds ns) { - if (static_cast(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) { - LOG_ERROR(Core_Timing, "Integer overflow, use max value"); - return std::numeric_limits::max(); - } - if (static_cast(ns.count()) > MAX_VALUE_TO_MULTIPLY) { - LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000); - } - return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; + const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE); + return Common::Divide128On32(temporal, static_cast(1000000000)).first; } u64 msToClockCycles(std::chrono::milliseconds ns) { @@ -69,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) { return Common::Divide128On32(temporal, static_cast(Hardware::BASE_CLOCK_RATE)).first; } +std::chrono::milliseconds CyclesToMs(s64 cycles) { + const u128 temporal = Common::Multiply64Into128(cycles, 1000); + u64 ms = Common::Divide128On32(temporal, static_cast(Hardware::BASE_CLOCK_RATE)).first; + return std::chrono::milliseconds(ms); +} + +std::chrono::nanoseconds CyclesToNs(s64 cycles) { + const u128 temporal = Common::Multiply64Into128(cycles, 1000000000); + u64 ns = Common::Divide128On32(temporal, static_cast(Hardware::BASE_CLOCK_RATE)).first; + return std::chrono::nanoseconds(ns); +} + +std::chrono::microseconds CyclesToUs(s64 cycles) { + const u128 temporal = Common::Multiply64Into128(cycles, 1000000); + u64 us = Common::Divide128On32(temporal, static_cast(Hardware::BASE_CLOCK_RATE)).first; + return std::chrono::microseconds(us); +} + } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index b3c58447d..2ed979e14 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -16,18 +16,9 @@ s64 nsToCycles(std::chrono::nanoseconds ns); u64 msToClockCycles(std::chrono::milliseconds ns); u64 usToClockCycles(std::chrono::microseconds ns); u64 nsToClockCycles(std::chrono::nanoseconds ns); - -inline std::chrono::milliseconds CyclesToMs(s64 cycles) { - return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); -} - -inline std::chrono::nanoseconds CyclesToNs(s64 cycles) { - return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE); -} - -inline std::chrono::microseconds CyclesToUs(s64 cycles) { - return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE); -} +std::chrono::milliseconds CyclesToMs(s64 cycles); +std::chrono::nanoseconds CyclesToNs(s64 cycles); +std::chrono::microseconds CyclesToUs(s64 cycles); u64 CpuCyclesToClockCycles(u64 ticks); diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 2e9dc9dc3..604405060 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -232,13 +232,10 @@ void CpuManager::SingleCoreRunGuestLoop() { auto* physical_core = &kernel.CurrentPhysicalCore(); auto& arm_interface = thread->ArmInterface(); system.EnterDynarmicProfile(); - while (!physical_core->IsInterrupted()) { + if (!physical_core->IsInterrupted()) { + system.CoreTiming().ResetTicks(); arm_interface.Run(); physical_core = &kernel.CurrentPhysicalCore(); - preemption_count++; - if (preemption_count % max_cycle_runs == 0) { - break; - } } system.ExitDynarmicProfile(); thread->SetPhantomMode(true); @@ -255,7 +252,7 @@ void CpuManager::SingleCoreRunIdleThread() { auto& kernel = system.Kernel(); while (true) { auto& physical_core = kernel.CurrentPhysicalCore(); - PreemptSingleCore(); + PreemptSingleCore(false); idle_count++; auto& scheduler = physical_core.Scheduler(); scheduler.TryDoContextSwitch(); @@ -279,12 +276,15 @@ void CpuManager::SingleCoreRunSuspendThread() { } } -void CpuManager::PreemptSingleCore() { - preemption_count = 0; +void CpuManager::PreemptSingleCore(bool from_running_enviroment) { std::size_t old_core = current_core; auto& scheduler = system.Kernel().Scheduler(old_core); Kernel::Thread* current_thread = scheduler.GetCurrentThread(); - if (idle_count >= 4) { + if (idle_count >= 4 || from_running_enviroment) { + if (!from_running_enviroment) { + system.CoreTiming().Idle(); + idle_count = 0; + } current_thread->SetPhantomMode(true); system.CoreTiming().Advance(); current_thread->SetPhantomMode(false); diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index e6b8612f0..ae55d6427 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -55,7 +55,7 @@ public: std::function GetSuspendThreadStartFunc(); void* GetStartFuncParamater(); - void PreemptSingleCore(); + void PreemptSingleCore(bool from_running_enviroment = true); std::size_t CurrentCore() const { return current_core.load(); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 599972211..c47fa9167 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1534,6 +1534,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { if (is_redundant && !system.Kernel().IsMulticore()) { system.Kernel().ExitSVCProfile(); + system.CoreTiming().AddTicks(1000U); system.GetCpuManager().PreemptSingleCore(); system.Kernel().EnterSVCProfile(); } @@ -1762,6 +1763,10 @@ static u64 GetSystemTick(Core::System& system) { // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) const u64 result{system.CoreTiming().GetClockTicks()}; + if (!system.Kernel().IsMulticore()) { + core_timing.AddTicks(400U); + } + return result; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 65fedfc9b..d88039a16 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -246,19 +246,23 @@ ResultVal> Thread::Create(Core::System& system, ThreadTy #ifdef ARCHITECTURE_x86_64 if (owner_process && !owner_process->Is64BitProcess()) { thread->arm_interface = std::make_unique( - system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id); + system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(), + processor_id); } else { thread->arm_interface = std::make_unique( - system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id); + system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(), + processor_id); } #else if (owner_process && !owner_process->Is64BitProcess()) { thread->arm_interface = std::make_shared( - system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch32, processor_id); + system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32, + processor_id); } else { thread->arm_interface = std::make_shared( - system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch64, processor_id); + system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64, + processor_id); } LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif -- cgit v1.2.3