From c31382ced54c07650ae41fa2f75dc53da894784e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 11 Feb 2020 16:02:41 -0300 Subject: query_cache: Abstract OpenGL implementation Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation. --- src/video_core/query_cache.h | 323 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 src/video_core/query_cache.h (limited to 'src/video_core/query_cache.h') diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..4c9151ce8 --- /dev/null +++ b/src/video_core/query_cache.h @@ -0,0 +1,323 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template +class CounterStreamBase { +public: + explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) + : cache{cache}, type{type} {} + + /// Updates the state of the stream, enabling or disabling as needed. + void Update(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } + } + + /// Resets the stream to zero. It doesn't disable the query after resetting. + void Reset() { + if (current) { + current->EndQuery(); + + // Immediately start a new query to avoid disabling its state. + current = cache.Counter(nullptr, type); + } + last = nullptr; + } + + /// Returns the current counter slicing as needed. + std::shared_ptr Current() { + if (!current) { + return nullptr; + } + current->EndQuery(); + last = std::move(current); + current = cache.Counter(last, type); + return last; + } + + /// Returns true when the counter stream is enabled. + bool IsEnabled() const { + return static_cast(current); + } + +private: + /// Enables the stream. + void Enable() { + if (current) { + return; + } + current = cache.Counter(last, type); + } + + // Disables the stream. + void Disable() { + if (current) { + current->EndQuery(); + } + last = std::exchange(current, nullptr); + } + + QueryCache& cache; + const VideoCore::QueryType type; + + std::shared_ptr current; + std::shared_ptr last; +}; + +template +class QueryCacheBase { +public: + explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ + static_cast(*this), + VideoCore::QueryType::SamplesPassed}}} {} + + void InvalidateRegion(CacheAddr addr, std::size_t size) { + FlushAndRemoveRegion(addr, size); + } + + void FlushRegion(CacheAddr addr, std::size_t size) { + FlushAndRemoveRegion(addr, size); + } + + /** + * Records a query in GPU mapped memory, potentially marked with a timestamp. + * @param gpu_addr GPU address to flush to when the mapped memory is read. + * @param type Query type, e.g. SamplesPassed. + * @param timestamp Timestamp, when empty the flushed query is assumed to be short. + */ + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) { + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + } + + query->BindCounter(Stream(type).Current(), timestamp); + } + + /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. + void UpdateCounters() { + const auto& regs = system.GPU().Maxwell3D().regs; + Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); + } + + /// Resets a counter to zero. It doesn't disable the query after resetting. + void ResetCounter(VideoCore::QueryType type) { + Stream(type).Reset(); + } + + /// Returns a new host counter. + std::shared_ptr Counter(std::shared_ptr dependency, + VideoCore::QueryType type) { + return std::make_shared(static_cast(*this), std::move(dependency), + type); + } + + /// Returns the counter stream of the specified type. + CounterStream& Stream(VideoCore::QueryType type) { + return streams[static_cast(type)]; + } + +private: + /// Flushes a memory range to guest memory and removes it from the cache. + void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast(addr); + const u64 addr_end = addr_begin + static_cast(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.CacheAddr(); + const u64 cache_end = cache_begin + query.SizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + query.Flush(); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } + } + + /// Registers the passed parameters as cached and returns a pointer to the stored cached query. + CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { + rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); + const u64 page = static_cast(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + return &cached_queries[page].emplace_back(static_cast(*this), type, cpu_addr, + host_ptr); + } + + /// Tries to a get a cached query. Returns nullptr on failure. + CachedQuery* TryGet(CacheAddr addr) { + const u64 page = static_cast(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.CacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; + } + + static constexpr std::uintptr_t PAGE_SIZE = 4096; + static constexpr int PAGE_SHIFT = 12; + + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; + + std::unordered_map> cached_queries; + + std::array streams; +}; + +template +class HostCounterBase { +public: + explicit HostCounterBase(std::shared_ptr dependency) + : dependency{std::move(dependency)} {} + + /// Returns the current value of the query. + u64 Query() { + if (result) { + return *result; + } + + u64 value = BlockingQuery(); + if (dependency) { + value += dependency->Query(); + } + + return *(result = value); + } + + /// Returns true when flushing this query will potentially wait. + bool WaitPending() const noexcept { + return result.has_value(); + } + +protected: + /// Returns the value of query from the backend API blocking as needed. + virtual u64 BlockingQuery() const = 0; + +private: + std::shared_ptr dependency; ///< Counter to add to this value. + std::optional result; ///< Filled with the already returned value. +}; + +template +class CachedQueryBase { +public: + explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) + : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + + CachedQueryBase(CachedQueryBase&& rhs) noexcept + : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)}, + timestamp{rhs.timestamp} {} + + CachedQueryBase(const CachedQueryBase&) = delete; + + CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept { + cpu_addr = rhs.cpu_addr; + host_ptr = rhs.host_ptr; + counter = std::move(rhs.counter); + timestamp = rhs.timestamp; + return *this; + } + + /// Flushes the query to guest memory. + virtual void Flush() { + // When counter is nullptr it means that it's just been reseted. We are supposed to write a + // zero in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } + } + + /// Binds a counter to this query. + void BindCounter(std::shared_ptr counter_, std::optional timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } + counter = std::move(counter_); + timestamp = timestamp_; + } + + VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + CacheAddr CacheAddr() const noexcept { + return ToCacheAddr(host_ptr); + } + + u64 SizeInBytes() const noexcept { + return SizeInBytes(timestamp.has_value()); + } + + static u64 SizeInBytes(bool with_timestamp) { + return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; + } + +protected: + /// Returns true when querying the counter may potentially block. + bool WaitPending() const noexcept { + return counter && counter->WaitPending(); + } + +private: + static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. + static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. + static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. + + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr counter; ///< Host counter to query, owns the dependency tree. + std::optional timestamp; ///< Timestamp to flush to guest memory. +}; + +} // namespace VideoCommon -- cgit v1.2.3 From bcd348f2388cf944f2ac49364a8d13b47cc21456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 11 Feb 2020 18:59:44 -0300 Subject: vk_query_cache: Implement generic query cache on Vulkan --- src/video_core/query_cache.h | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'src/video_core/query_cache.h') diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 4c9151ce8..069032121 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -88,7 +88,8 @@ private: std::shared_ptr last; }; -template +template class QueryCacheBase { public: explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) @@ -127,15 +128,25 @@ public: /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. void UpdateCounters() { + std::unique_lock lock{mutex}; const auto& regs = system.GPU().Maxwell3D().regs; Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); } /// Resets a counter to zero. It doesn't disable the query after resetting. void ResetCounter(VideoCore::QueryType type) { + std::unique_lock lock{mutex}; Stream(type).Reset(); } + /// Disable all active streams. Expected to be called at the end of a command buffer. + void DisableStreams() { + std::unique_lock lock{mutex}; + for (auto& stream : streams) { + stream.Update(false); + } + } + /// Returns a new host counter. std::shared_ptr Counter(std::shared_ptr dependency, VideoCore::QueryType type) { @@ -148,6 +159,9 @@ public: return streams[static_cast(type)]; } +protected: + std::array query_pools; + private: /// Flushes a memory range to guest memory and removes it from the cache. void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { @@ -213,8 +227,16 @@ private: template class HostCounterBase { public: - explicit HostCounterBase(std::shared_ptr dependency) - : dependency{std::move(dependency)} {} + explicit HostCounterBase(std::shared_ptr dependency_) + : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { + // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. + static constexpr u64 depth_threshold = 96; + if (depth > depth_threshold) { + depth = 0; + base_result = dependency->Query(); + dependency = nullptr; + } + } /// Returns the current value of the query. u64 Query() { @@ -222,9 +244,10 @@ public: return *result; } - u64 value = BlockingQuery(); + u64 value = BlockingQuery() + base_result; if (dependency) { value += dependency->Query(); + dependency = nullptr; } return *(result = value); @@ -235,6 +258,10 @@ public: return result.has_value(); } + u64 Depth() const noexcept { + return depth; + } + protected: /// Returns the value of query from the backend API blocking as needed. virtual u64 BlockingQuery() const = 0; @@ -242,6 +269,8 @@ protected: private: std::shared_ptr dependency; ///< Counter to add to this value. std::optional result; ///< Filled with the already returned value. + u64 depth; ///< Number of nested dependencies. + u64 base_result = 0; ///< Equivalent to nested dependencies value. }; template -- cgit v1.2.3 From cc0694559fd7084221540d595edb228f8267a96c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Feb 2020 14:28:22 -0300 Subject: query_cache: Add a recursive mutex for concurrent usage --- src/video_core/query_cache.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/video_core/query_cache.h') diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 069032121..86f5aade1 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -98,10 +99,12 @@ public: VideoCore::QueryType::SamplesPassed}}} {} void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } void FlushRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } @@ -112,6 +115,7 @@ public: * @param timestamp Timestamp, when empty the flushed query is assumed to be short. */ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) { + std::unique_lock lock{mutex}; auto& memory_manager = system.GPU().MemoryManager(); const auto host_ptr = memory_manager.GetPointer(gpu_addr); @@ -219,6 +223,8 @@ private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; + std::recursive_mutex mutex; + std::unordered_map> cached_queries; std::array streams; -- cgit v1.2.3 From 54a00ee4cfdc9f9cd0985d10c4f3a822a284d997 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Feb 2020 15:31:37 -0300 Subject: query_cache: Fix ambiguity in CacheAddr getter --- src/video_core/query_cache.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/video_core/query_cache.h') diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 86f5aade1..a040858e8 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -172,7 +172,7 @@ private: const u64 addr_begin = static_cast(addr); const u64 addr_end = addr_begin + static_cast(size); const auto in_range = [addr_begin, addr_end](CachedQuery& query) { - const u64 cache_begin = query.CacheAddr(); + const u64 cache_begin = query.GetCacheAddr(); const u64 cache_end = cache_begin + query.SizeInBytes(); return cache_begin < addr_end && addr_begin < cache_end; }; @@ -212,8 +212,9 @@ private: return nullptr; } auto& contents = it->second; - const auto found = std::find_if(std::begin(contents), std::end(contents), - [addr](auto& query) { return query.CacheAddr() == addr; }); + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCacheAddr() == addr; }); return found != std::end(contents) ? &*found : nullptr; } @@ -326,7 +327,7 @@ public: return cpu_addr; } - CacheAddr CacheAddr() const noexcept { + CacheAddr GetCacheAddr() const noexcept { return ToCacheAddr(host_ptr); } -- cgit v1.2.3 From 6d3a046caa894997946736d3466855682d770d45 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 13 Feb 2020 21:11:21 -0300 Subject: query_cache: Address feedback --- src/video_core/query_cache.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'src/video_core/query_cache.h') diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index a040858e8..e66054ed0 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -62,7 +62,7 @@ public: /// Returns true when the counter stream is enabled. bool IsEnabled() const { - return static_cast(current); + return current != nullptr; } private: @@ -163,6 +163,11 @@ public: return streams[static_cast(type)]; } + /// Returns the counter stream of the specified type. + const CounterStream& Stream(VideoCore::QueryType type) const { + return streams[static_cast(type)]; + } + protected: std::array query_pools; @@ -219,7 +224,7 @@ private: } static constexpr std::uintptr_t PAGE_SIZE = 4096; - static constexpr int PAGE_SHIFT = 12; + static constexpr unsigned PAGE_SHIFT = 12; Core::System& system; VideoCore::RasterizerInterface& rasterizer; @@ -237,13 +242,14 @@ public: explicit HostCounterBase(std::shared_ptr dependency_) : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. - static constexpr u64 depth_threshold = 96; + constexpr u64 depth_threshold = 96; if (depth > depth_threshold) { depth = 0; base_result = dependency->Query(); dependency = nullptr; } } + virtual ~HostCounterBase() = default; /// Returns the current value of the query. u64 Query() { @@ -257,7 +263,8 @@ public: dependency = nullptr; } - return *(result = value); + result = value; + return *result; } /// Returns true when flushing this query will potentially wait. @@ -285,20 +292,13 @@ class CachedQueryBase { public: explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + virtual ~CachedQueryBase() = default; - CachedQueryBase(CachedQueryBase&& rhs) noexcept - : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)}, - timestamp{rhs.timestamp} {} - + CachedQueryBase(CachedQueryBase&&) noexcept = default; CachedQueryBase(const CachedQueryBase&) = delete; - CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept { - cpu_addr = rhs.cpu_addr; - host_ptr = rhs.host_ptr; - counter = std::move(rhs.counter); - timestamp = rhs.timestamp; - return *this; - } + CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; + CachedQueryBase& operator=(const CachedQueryBase&) = delete; /// Flushes the query to guest memory. virtual void Flush() { @@ -335,7 +335,7 @@ public: return SizeInBytes(timestamp.has_value()); } - static u64 SizeInBytes(bool with_timestamp) { + static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; } -- cgit v1.2.3