diff options
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/common/container_hash.h | 92 | ||||
-rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.cpp | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_wait.cpp | 69 | ||||
-rw-r--r-- | src/common/x64/cpu_wait.h | 10 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 13 |
8 files changed, 182 insertions, 8 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 90805babe..13ed68b3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -38,6 +38,7 @@ add_library(common STATIC common_precompiled_headers.h common_types.h concepts.h + container_hash.h demangle.cpp demangle.h div_ceil.h @@ -159,6 +160,8 @@ if(ARCHITECTURE_x86_64) PRIVATE x64/cpu_detect.cpp x64/cpu_detect.h + x64/cpu_wait.cpp + x64/cpu_wait.h x64/native_clock.cpp x64/native_clock.h x64/xbyak_abi.h diff --git a/src/common/container_hash.h b/src/common/container_hash.h new file mode 100644 index 000000000..a5e357745 --- /dev/null +++ b/src/common/container_hash.h @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: 2005-2014 Daniel James +// SPDX-FileCopyrightText: 2016 Austin Appleby +// SPDX-License-Identifier: BSL-1.0 + +#include <array> +#include <climits> +#include <cstdint> +#include <limits> +#include <type_traits> +#include <vector> + +namespace Common { + +namespace detail { + +template <typename T> + requires std::is_unsigned_v<T> +inline std::size_t HashValue(T val) { + const unsigned int size_t_bits = std::numeric_limits<std::size_t>::digits; + const unsigned int length = + (std::numeric_limits<T>::digits - 1) / static_cast<unsigned int>(size_t_bits); + + std::size_t seed = 0; + + for (unsigned int i = length * size_t_bits; i > 0; i -= size_t_bits) { + seed ^= static_cast<size_t>(val >> i) + (seed << 6) + (seed >> 2); + } + + seed ^= static_cast<size_t>(val) + (seed << 6) + (seed >> 2); + + return seed; +} + +template <size_t Bits> +struct HashCombineImpl { + template <typename T> + static inline T fn(T seed, T value) { + seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } +}; + +template <> +struct HashCombineImpl<64> { + static inline std::uint64_t fn(std::uint64_t h, std::uint64_t k) { + const std::uint64_t m = (std::uint64_t(0xc6a4a793) << 32) + 0x5bd1e995; + const int r = 47; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + + // Completely arbitrary number, to prevent 0's + // from hashing to 0. + h += 0xe6546b64; + + return h; + } +}; + +} // namespace detail + +template <typename T> +inline void HashCombine(std::size_t& seed, const T& v) { + seed = detail::HashCombineImpl<sizeof(std::size_t) * CHAR_BIT>::fn(seed, detail::HashValue(v)); +} + +template <typename It> +inline std::size_t HashRange(It first, It last) { + std::size_t seed = 0; + + for (; first != last; ++first) { + HashCombine<typename std::iterator_traits<It>::value_type>(seed, *first); + } + + return seed; +} + +template <typename T, size_t Size> +std::size_t HashValue(const std::array<T, Size>& v) { + return HashRange(v.cbegin(), v.cend()); +} + +template <typename T, typename Allocator> +std::size_t HashValue(const std::vector<T, Allocator>& v) { + return HashRange(v.cbegin(), v.cend()); +} + +} // namespace Common diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index d26394359..91352912d 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -97,6 +97,7 @@ void AppendCPUInfo(FieldCollection& fc) { add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); add_field("CPU_Extension_x64_POPCNT", caps.popcnt); add_field("CPU_Extension_x64_SHA", caps.sha); + add_field("CPU_Extension_x64_WAITPKG", caps.waitpkg); #else fc.AddField(FieldType::UserSystem, "CPU_Model", "Other"); #endif diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index e54383a4a..72ed6e96c 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -144,6 +144,7 @@ static CPUCaps Detect() { caps.bmi2 = Common::Bit<8>(cpu_id[1]); caps.sha = Common::Bit<29>(cpu_id[1]); + caps.waitpkg = Common::Bit<5>(cpu_id[2]); caps.gfni = Common::Bit<8>(cpu_id[2]); __cpuidex(cpu_id, 0x00000007, 0x00000001); diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index ca8db19d6..8253944d6 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -67,6 +67,7 @@ struct CPUCaps { bool pclmulqdq : 1; bool popcnt : 1; bool sha : 1; + bool waitpkg : 1; }; /** diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp new file mode 100644 index 000000000..cfeef6a3d --- /dev/null +++ b/src/common/x64/cpu_wait.cpp @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <thread> + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#include "common/x64/cpu_detect.h" +#include "common/x64/cpu_wait.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} + +__forceinline static void TPAUSE() { + // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. + // For reference: + // At 1 GHz, 100K cycles is 100us + // At 2 GHz, 100K cycles is 50us + // At 4 GHz, 100K cycles is 25us + static constexpr auto PauseCycles = 100'000; + _tpause(0, FencedRDTSC() + PauseCycles); +} +#else +static u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} + +static void TPAUSE() { + // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. + // For reference: + // At 1 GHz, 100K cycles is 100us + // At 2 GHz, 100K cycles is 50us + // At 4 GHz, 100K cycles is 25us + static constexpr auto PauseCycles = 100'000; + const auto tsc = FencedRDTSC() + PauseCycles; + const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); + const auto edx = static_cast<u32>(tsc >> 32); + asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); +} +#endif + +void MicroSleep() { + static const bool has_waitpkg = GetCPUCaps().waitpkg; + + if (has_waitpkg) { + TPAUSE(); + } else { + std::this_thread::yield(); + } +} + +} // namespace Common::X64 diff --git a/src/common/x64/cpu_wait.h b/src/common/x64/cpu_wait.h new file mode 100644 index 000000000..99d3757a7 --- /dev/null +++ b/src/common/x64/cpu_wait.h @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +namespace Common::X64 { + +void MicroSleep(); + +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 76c66e7ee..277b00662 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -27,16 +27,13 @@ __forceinline static u64 FencedRDTSC() { } #else static u64 FencedRDTSC() { - u64 result; + u64 eax; + u64 edx; asm volatile("lfence\n\t" "rdtsc\n\t" - "shl $32, %%rdx\n\t" - "or %%rdx, %0\n\t" - "lfence" - : "=a"(result) - : - : "rdx", "memory", "cc"); - return result; + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; } #endif |