diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-01-01 23:28:55 +0100 |
---|---|---|
committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-01-02 00:43:47 +0100 |
commit | d4f871cb6a616f6b1a8a76049e042118571f3dd3 (patch) | |
tree | 0dc75cf49ce8240fb282b522dc2989eb8894b979 /src | |
parent | Merge pull request #5271 from MerryMage/rm-mem-Special (diff) | |
download | yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.tar yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.tar.gz yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.tar.bz2 yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.tar.lz yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.tar.xz yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.tar.zst yuzu-d4f871cb6a616f6b1a8a76049e042118571f3dd3.zip |
Diffstat (limited to 'src')
-rw-r--r-- | src/common/x64/native_clock.cpp | 69 | ||||
-rw-r--r-- | src/common/x64/native_clock.h | 7 |
2 files changed, 71 insertions, 5 deletions
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index eb8a7782f..e246432d0 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -2,12 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <chrono> +#include <limits> #include <mutex> #include <thread> #ifdef _MSC_VER #include <intrin.h> + +#pragma intrinsic(__umulh) +#pragma intrinsic(_udiv128) #else #include <x86intrin.h> #endif @@ -15,6 +20,55 @@ #include "common/uint128.h" #include "common/x64/native_clock.h" +namespace { + +[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { +#ifdef __SIZEOF_INT128__ + const auto base = static_cast<unsigned __int128>(numerator) << 64ULL; + return static_cast<u64>(base / divisor); +#elif defined(_M_X64) || defined(_M_ARM64) + std::array<u64, 2> r = {0, numerator}; + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], divisor, &remainder); +#else + return _udiv128(r[1], r[0], divisor, &remainder); +#endif +#else + // This one is bit more inaccurate. + return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor); +#endif +} + +[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { +#ifdef __SIZEOF_INT128__ + return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64; +#elif defined(_M_X64) || defined(_M_ARM64) + return __umulh(a, b); // MSVC +#else + // Generic fallback + const u64 a_lo = u32(a); + const u64 a_hi = a >> 32; + const u64 b_lo = u32(b); + const u64 b_hi = b >> 32; + + const u64 a_x_b_hi = a_hi * b_hi; + const u64 a_x_b_mid = a_hi * b_lo; + const u64 b_x_a_mid = b_hi * a_lo; + const u64 a_x_b_lo = a_lo * b_lo; + + const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) + + static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >> + 32; + + const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return multhi; +#endif +} + +} // namespace + namespace Common { u64 EstimateRDTSCFrequency() { @@ -50,6 +104,11 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen _mm_mfence(); last_measure = __rdtsc(); accumulated_ticks = 0U; + ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); + us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); + ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); + clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); + cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); } u64 NativeClock::GetRTSC() { @@ -75,27 +134,27 @@ void NativeClock::Pause(bool is_paused) { std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; + return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; } std::chrono::microseconds NativeClock::GetTimeUS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; + return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; } std::chrono::milliseconds NativeClock::GetTimeMS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; + return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; } u64 NativeClock::GetClockCycles() { const u64 rtsc_value = GetRTSC(); - return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); + return MultiplyHigh(rtsc_value, clock_rtsc_factor); } u64 NativeClock::GetCPUCycles() { const u64 rtsc_value = GetRTSC(); - return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); + return MultiplyHigh(rtsc_value, cpu_rtsc_factor); } } // namespace X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 6d1e32ac8..a7b1ee9e0 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -41,6 +41,13 @@ private: u64 last_measure{}; u64 accumulated_ticks{}; u64 rtsc_frequency; + + // factors + u64 ns_rtsc_factor{}; + u64 us_rtsc_factor{}; + u64 ms_rtsc_factor{}; + u64 clock_rtsc_factor{}; + u64 cpu_rtsc_factor{}; }; } // namespace X64 |