diff options
author | Morph <39850852+Morph1984@users.noreply.github.com> | 2023-04-23 05:08:28 +0200 |
---|---|---|
committer | Morph <39850852+Morph1984@users.noreply.github.com> | 2023-06-08 03:44:42 +0200 |
commit | 1492a65454d6a03f641b136cc61e68870be00218 (patch) | |
tree | 5442ef0505fb075ee329a1a3cbb1be831987295e | |
parent | x64: Deduplicate RDTSC usage (diff) | |
download | yuzu-1492a65454d6a03f641b136cc61e68870be00218.tar yuzu-1492a65454d6a03f641b136cc61e68870be00218.tar.gz yuzu-1492a65454d6a03f641b136cc61e68870be00218.tar.bz2 yuzu-1492a65454d6a03f641b136cc61e68870be00218.tar.lz yuzu-1492a65454d6a03f641b136cc61e68870be00218.tar.xz yuzu-1492a65454d6a03f641b136cc61e68870be00218.tar.zst yuzu-1492a65454d6a03f641b136cc61e68870be00218.zip |
-rw-r--r-- | src/common/steady_clock.cpp | 5 | ||||
-rw-r--r-- | src/common/wall_clock.cpp | 73 | ||||
-rw-r--r-- | src/common/wall_clock.h | 54 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 165 | ||||
-rw-r--r-- | src/common/x64/native_clock.h | 56 |
5 files changed, 94 insertions, 259 deletions
diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. static constexpr s64 Multiplier = 100; // Convert Windows epoch to Unix epoch. - static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; + static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; FILETIME filetime; GetSystemTimePreciseAsFileTime(&filetime); return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + - static_cast<s64>(filetime.dwLowDateTime)) - - WindowsEpochToUnixEpochNS; + static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); } #endif diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..ad8db06b0 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,88 +2,71 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/steady_clock.h" -#include "common/uint128.h" #include "common/wall_clock.h" #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" #endif namespace Common { class StandardWallClock final : public WallClock { public: - explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, - start_time{SteadyClock::Now()} {} + explicit StandardWallClock() : start_time{SteadyClock::Now()} {} - std::chrono::nanoseconds GetTimeNS() override { + std::chrono::nanoseconds GetTimeNS() const override { return SteadyClock::Now() - start_time; } - std::chrono::microseconds GetTimeUS() override { - return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); + std::chrono::microseconds GetTimeUS() const override { + return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den); } - std::chrono::milliseconds GetTimeMS() override { - return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); + std::chrono::milliseconds GetTimeMS() const override { + return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den); } - u64 GetClockCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetCNTPCT() const override { + return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } - u64 GetCPUCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetHostTicksNow() const override { + return static_cast<u64>(SteadyClock::Now().time_since_epoch().count()); } - void Pause([[maybe_unused]] bool is_paused) override { - // Do nothing in this clock type. + u64 GetHostTicksElapsed() const override { + return static_cast<u64>(GetTimeNS().count()); + } + + bool IsNative() const override { + return false; } private: SteadyClock::time_point start_time; }; +std::unique_ptr<WallClock> CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 - -std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); - u64 rtsc_frequency = 0; - if (caps.invariant_tsc) { - rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); - } - // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: - // - A nanosecond - // - The emulated CPU frequency - // - The emulated clock counter frequency (CNTFRQ) - if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || - rtsc_frequency <= emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, - emulated_clock_frequency); + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::CNTFRQ) { + return std::make_unique<X64::NativeClock>(caps.tsc_frequency); } else { - return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, - rtsc_frequency); + // Fallback to StandardWallClock if the hardware TSC + // - Is not invariant + // - Is not more precise than CNTFRQ + return std::make_unique<StandardWallClock>(); } -} - #else - -std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); -} - + return std::make_unique<StandardWallClock>(); #endif +} -std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); +std::unique_ptr<WallClock> CreateStandardWallClock() { + return std::make_unique<StandardWallClock>(); } } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..a73e6e644 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #include <chrono> #include <memory> +#include <ratio> #include "common/common_types.h" @@ -12,50 +13,43 @@ namespace Common { class WallClock { public: - static constexpr u64 NS_RATIO = 1'000'000'000; - static constexpr u64 US_RATIO = 1'000'000; - static constexpr u64 MS_RATIO = 1'000; + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz virtual ~WallClock() = default; - /// Returns current wall time in nanoseconds - [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; + /// @returns The time in nanoseconds since the construction of this clock. + virtual std::chrono::nanoseconds GetTimeNS() const = 0; - /// Returns current wall time in microseconds - [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; + /// @returns The time in microseconds since the construction of this clock. + virtual std::chrono::microseconds GetTimeUS() const = 0; - /// Returns current wall time in milliseconds - [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; + /// @returns The time in milliseconds since the construction of this clock. + virtual std::chrono::milliseconds GetTimeMS() const = 0; - /// Returns current wall time in emulated clock cycles - [[nodiscard]] virtual u64 GetClockCycles() = 0; + /// @returns The guest CNTPCT ticks since the construction of this clock. + virtual u64 GetCNTPCT() const = 0; - /// Returns current wall time in emulated cpu cycles - [[nodiscard]] virtual u64 GetCPUCycles() = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. + virtual u64 GetHostTicksNow() const = 0; - virtual void Pause(bool is_paused) = 0; + /// @returns The raw host timer ticks since the construction of this clock. + virtual u64 GetHostTicksElapsed() const = 0; - /// Tells if the wall clock, uses the host CPU's hardware clock - [[nodiscard]] bool IsNative() const { - return is_native; - } + /// @returns Whether the clock directly uses the host's hardware clock. + virtual bool IsNative() const = 0; protected: - explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) - : emulated_cpu_frequency{emulated_cpu_frequency_}, - emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} + using NsRatio = std::nano; + using UsRatio = std::micro; + using MsRatio = std::milli; - u64 emulated_cpu_frequency; - u64 emulated_clock_frequency; - -private: - bool is_native; + using NsToUsRatio = std::ratio_divide<std::nano, std::micro>; + using NsToMsRatio = std::ratio_divide<std::nano, std::milli>; + using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>; }; -[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr<WallClock> CreateOptimalClock(); -[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr<WallClock> CreateStandardWallClock(); } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..5d1eb0590 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,164 +1,45 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <array> -#include <chrono> -#include <thread> - -#include "common/atomic_ops.h" -#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" -#ifdef _MSC_VER -#include <intrin.h> -#endif +namespace Common::X64 { -namespace Common { +NativeClock::NativeClock(u64 rdtsc_frequency_) + : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, + ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, + us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, + ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)} {} -#ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} -#else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; +std::chrono::nanoseconds NativeClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; } -#endif -template <u64 Nearest> -static u64 RoundToNearest(u64 value) { - const auto mod = value % Nearest; - return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +std::chrono::microseconds NativeClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; } -u64 EstimateRDTSCFrequency() { - // Discard the first result measuring the rdtsc. - FencedRDTSC(); - std::this_thread::sleep_for(std::chrono::milliseconds{1}); - FencedRDTSC(); - - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 250 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{250}); - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast<u64>( - std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return RoundToNearest<1000>(tsc_freq); +std::chrono::milliseconds NativeClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; } -namespace X64 { -NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ - rtsc_frequency_} { - // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. - time_sync_thread = std::jthread{[this](std::stop_token token) { - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 10 seconds. - if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { - return; - } - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast<u64>( - std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - rtsc_frequency = tsc_freq; - CalculateAndSetFactors(); - }}; - - time_point.inner.last_measure = FencedRDTSC(); - time_point.inner.accumulated_ticks = 0U; - CalculateAndSetFactors(); +u64 NativeClock::GetCNTPCT() const { + return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } -u64 NativeClock::GetRTSC() { - TimePoint new_time_point{}; - TimePoint current_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - const u64 current_measure = FencedRDTSC(); - u64 diff = current_measure - current_time_point.inner.last_measure; - diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) - new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure - ? current_measure - : current_time_point.inner.last_measure; - new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - return new_time_point.inner.accumulated_ticks; +u64 NativeClock::GetHostTicksNow() const { + return FencedRDTSC(); } -void NativeClock::Pause(bool is_paused) { - if (!is_paused) { - TimePoint current_time_point{}; - TimePoint new_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - new_time_point.pack = current_time_point.pack; - new_time_point.inner.last_measure = FencedRDTSC(); - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - } -} - -std::chrono::nanoseconds NativeClock::GetTimeNS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; +u64 NativeClock::GetHostTicksElapsed() const { + return FencedRDTSC() - start_ticks; } -std::chrono::microseconds NativeClock::GetTimeUS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; +bool NativeClock::IsNative() const { + return true; } -std::chrono::milliseconds NativeClock::GetTimeMS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; -} - -u64 NativeClock::GetClockCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, clock_rtsc_factor); -} - -u64 NativeClock::GetCPUCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, cpu_rtsc_factor); -} - -void NativeClock::CalculateAndSetFactors() { - ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); - clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); - cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); -} - -} // namespace X64 - -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..d6f8626c1 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -3,58 +3,36 @@ #pragma once -#include "common/polyfill_thread.h" #include "common/wall_clock.h" -namespace Common { +namespace Common::X64 { -namespace X64 { class NativeClock final : public WallClock { public: - explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_); + explicit NativeClock(u64 rdtsc_frequency_); - std::chrono::nanoseconds GetTimeNS() override; + std::chrono::nanoseconds GetTimeNS() const override; - std::chrono::microseconds GetTimeUS() override; + std::chrono::microseconds GetTimeUS() const override; - std::chrono::milliseconds GetTimeMS() override; + std::chrono::milliseconds GetTimeMS() const override; - u64 GetClockCycles() override; + u64 GetCNTPCT() const override; - u64 GetCPUCycles() override; + u64 GetHostTicksNow() const override; - void Pause(bool is_paused) override; + u64 GetHostTicksElapsed() const override; -private: - u64 GetRTSC(); - - void CalculateAndSetFactors(); - - union alignas(16) TimePoint { - TimePoint() : pack{} {} - u128 pack{}; - struct Inner { - u64 last_measure{}; - u64 accumulated_ticks{}; - } inner; - }; - - TimePoint time_point; + bool IsNative() const override; - // factors - u64 clock_rtsc_factor{}; - u64 cpu_rtsc_factor{}; - u64 ns_rtsc_factor{}; - u64 us_rtsc_factor{}; - u64 ms_rtsc_factor{}; - - u64 rtsc_frequency; +private: + u64 start_ticks; + u64 rdtsc_frequency; - std::jthread time_sync_thread; + u64 ns_rdtsc_factor; + u64 us_rdtsc_factor; + u64 ms_rdtsc_factor; + u64 cntpct_rdtsc_factor; }; -} // namespace X64 - -u64 EstimateRDTSCFrequency(); -} // namespace Common +} // namespace Common::X64 |