summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2020-02-11 20:02:41 +0100
committerReinUsesLisp <reinuseslisp@airmail.cc>2020-02-14 21:38:27 +0100
commitc31382ced54c07650ae41fa2f75dc53da894784e (patch)
treeaeff230f51000fc357bbc2859c5ef66892f9a841
parentgl_query_cache: Optimize query cache (diff)
downloadyuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.gz
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.bz2
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.lz
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.xz
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.zst
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.zip
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/query_cache.h323
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp287
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h122
4 files changed, 394 insertions, 339 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3208f4993..bb5895e99 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
memory_manager.h
morton.cpp
morton.h
+ query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
rasterizer_cache.cpp
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 000000000..4c9151ce8
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,323 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class QueryCache, class HostCounter>
+class CounterStreamBase {
+public:
+ explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
+ : cache{cache}, type{type} {}
+
+ /// Updates the state of the stream, enabling or disabling as needed.
+ void Update(bool enabled) {
+ if (enabled) {
+ Enable();
+ } else {
+ Disable();
+ }
+ }
+
+ /// Resets the stream to zero. It doesn't disable the query after resetting.
+ void Reset() {
+ if (current) {
+ current->EndQuery();
+
+ // Immediately start a new query to avoid disabling its state.
+ current = cache.Counter(nullptr, type);
+ }
+ last = nullptr;
+ }
+
+ /// Returns the current counter slicing as needed.
+ std::shared_ptr<HostCounter> Current() {
+ if (!current) {
+ return nullptr;
+ }
+ current->EndQuery();
+ last = std::move(current);
+ current = cache.Counter(last, type);
+ return last;
+ }
+
+ /// Returns true when the counter stream is enabled.
+ bool IsEnabled() const {
+ return static_cast<bool>(current);
+ }
+
+private:
+ /// Enables the stream.
+ void Enable() {
+ if (current) {
+ return;
+ }
+ current = cache.Counter(last, type);
+ }
+
+ // Disables the stream.
+ void Disable() {
+ if (current) {
+ current->EndQuery();
+ }
+ last = std::exchange(current, nullptr);
+ }
+
+ QueryCache& cache;
+ const VideoCore::QueryType type;
+
+ std::shared_ptr<HostCounter> current;
+ std::shared_ptr<HostCounter> last;
+};
+
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
+class QueryCacheBase {
+public:
+ explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+ : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
+ static_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}}} {}
+
+ void InvalidateRegion(CacheAddr addr, std::size_t size) {
+ FlushAndRemoveRegion(addr, size);
+ }
+
+ void FlushRegion(CacheAddr addr, std::size_t size) {
+ FlushAndRemoveRegion(addr, size);
+ }
+
+ /**
+ * Records a query in GPU mapped memory, potentially marked with a timestamp.
+ * @param gpu_addr GPU address to flush to when the mapped memory is read.
+ * @param type Query type, e.g. SamplesPassed.
+ * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
+ */
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
+ auto& memory_manager = system.GPU().MemoryManager();
+ const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+
+ CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
+ if (!query) {
+ const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+
+ query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
+ }
+
+ query->BindCounter(Stream(type).Current(), timestamp);
+ }
+
+ /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
+ void UpdateCounters() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
+ }
+
+ /// Resets a counter to zero. It doesn't disable the query after resetting.
+ void ResetCounter(VideoCore::QueryType type) {
+ Stream(type).Reset();
+ }
+
+ /// Returns a new host counter.
+ std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type) {
+ return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
+ type);
+ }
+
+ /// Returns the counter stream of the specified type.
+ CounterStream& Stream(VideoCore::QueryType type) {
+ return streams[static_cast<std::size_t>(type)];
+ }
+
+private:
+ /// Flushes a memory range to guest memory and removes it from the cache.
+ void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
+ const u64 addr_begin = static_cast<u64>(addr);
+ const u64 addr_end = addr_begin + static_cast<u64>(size);
+ const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
+ const u64 cache_begin = query.CacheAddr();
+ const u64 cache_end = cache_begin + query.SizeInBytes();
+ return cache_begin < addr_end && addr_begin < cache_end;
+ };
+
+ const u64 page_end = addr_end >> PAGE_SHIFT;
+ for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+ const auto& it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ continue;
+ }
+ auto& contents = it->second;
+ for (auto& query : contents) {
+ if (!in_range(query)) {
+ continue;
+ }
+ rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
+ query.Flush();
+ }
+ contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
+ std::end(contents));
+ }
+ }
+
+ /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
+ CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
+ rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
+ const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
+ return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
+ host_ptr);
+ }
+
+ /// Tries to a get a cached query. Returns nullptr on failure.
+ CachedQuery* TryGet(CacheAddr addr) {
+ const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+ const auto it = cached_queries.find(page);
+ if (it == std::end(cached_queries)) {
+ return nullptr;
+ }
+ auto& contents = it->second;
+ const auto found = std::find_if(std::begin(contents), std::end(contents),
+ [addr](auto& query) { return query.CacheAddr() == addr; });
+ return found != std::end(contents) ? &*found : nullptr;
+ }
+
+ static constexpr std::uintptr_t PAGE_SIZE = 4096;
+ static constexpr int PAGE_SHIFT = 12;
+
+ Core::System& system;
+ VideoCore::RasterizerInterface& rasterizer;
+
+ std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
+
+ std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+};
+
+template <class QueryCache, class HostCounter>
+class HostCounterBase {
+public:
+ explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
+ : dependency{std::move(dependency)} {}
+
+ /// Returns the current value of the query.
+ u64 Query() {
+ if (result) {
+ return *result;
+ }
+
+ u64 value = BlockingQuery();
+ if (dependency) {
+ value += dependency->Query();
+ }
+
+ return *(result = value);
+ }
+
+ /// Returns true when flushing this query will potentially wait.
+ bool WaitPending() const noexcept {
+ return result.has_value();
+ }
+
+protected:
+ /// Returns the value of query from the backend API blocking as needed.
+ virtual u64 BlockingQuery() const = 0;
+
+private:
+ std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
+ std::optional<u64> result; ///< Filled with the already returned value.
+};
+
+template <class HostCounter>
+class CachedQueryBase {
+public:
+ explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
+ : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+
+ CachedQueryBase(CachedQueryBase&& rhs) noexcept
+ : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)},
+ timestamp{rhs.timestamp} {}
+
+ CachedQueryBase(const CachedQueryBase&) = delete;
+
+ CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept {
+ cpu_addr = rhs.cpu_addr;
+ host_ptr = rhs.host_ptr;
+ counter = std::move(rhs.counter);
+ timestamp = rhs.timestamp;
+ return *this;
+ }
+
+ /// Flushes the query to guest memory.
+ virtual void Flush() {
+ // When counter is nullptr it means that it's just been reseted. We are supposed to write a
+ // zero in these cases.
+ const u64 value = counter ? counter->Query() : 0;
+ std::memcpy(host_ptr, &value, sizeof(u64));
+
+ if (timestamp) {
+ std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+ }
+ }
+
+ /// Binds a counter to this query.
+ void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
+ if (counter) {
+ // If there's an old counter set it means the query is being rewritten by the game.
+ // To avoid losing the data forever, flush here.
+ Flush();
+ }
+ counter = std::move(counter_);
+ timestamp = timestamp_;
+ }
+
+ VAddr CpuAddr() const noexcept {
+ return cpu_addr;
+ }
+
+ CacheAddr CacheAddr() const noexcept {
+ return ToCacheAddr(host_ptr);
+ }
+
+ u64 SizeInBytes() const noexcept {
+ return SizeInBytes(timestamp.has_value());
+ }
+
+ static u64 SizeInBytes(bool with_timestamp) {
+ return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+ }
+
+protected:
+ /// Returns true when querying the counter may potentially block.
+ bool WaitPending() const noexcept {
+ return counter && counter->WaitPending();
+ }
+
+private:
+ static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
+ static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
+ static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
+
+ VAddr cpu_addr; ///< Guest CPU address.
+ u8* host_ptr; ///< Writable host pointer.
+ std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
+ std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 74cb73209..7d5a044c7 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -20,211 +20,49 @@
namespace OpenGL {
-using VideoCore::QueryType;
-
namespace {
-constexpr std::uintptr_t PAGE_SIZE = 4096;
-constexpr int PAGE_SHIFT = 12;
-
-constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp
-constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
-constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
-
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
-constexpr GLenum GetTarget(QueryType type) {
+constexpr GLenum GetTarget(VideoCore::QueryType type) {
return QueryTargets[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
-CounterStream::CounterStream(QueryCache& cache, QueryType type)
- : cache{cache}, type{type}, target{GetTarget(type)} {}
-
-CounterStream::~CounterStream() = default;
-
-void CounterStream::Update(bool enabled, bool any_command_queued) {
- if (enabled) {
- Enable();
- } else {
- Disable(any_command_queued);
- }
-}
-
-void CounterStream::Reset(bool any_command_queued) {
- if (current) {
- EndQuery(any_command_queued);
-
- // Immediately start a new query to avoid disabling its state.
- current = cache.GetHostCounter(nullptr, type);
- }
- last = nullptr;
-}
-
-std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
- if (!current) {
- return nullptr;
- }
- EndQuery(any_command_queued);
- last = std::move(current);
- current = cache.GetHostCounter(last, type);
- return last;
-}
-
-void CounterStream::Enable() {
- if (current) {
- return;
- }
- current = cache.GetHostCounter(last, type);
-}
-
-void CounterStream::Disable(bool any_command_queued) {
- if (current) {
- EndQuery(any_command_queued);
- }
- last = std::exchange(current, nullptr);
-}
-
-void CounterStream::EndQuery(bool any_command_queued) {
- if (!any_command_queued) {
- // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
- // having any of these causes a lock. glFlush is considered a command, so we can safely wait
- // for this. Insert to the OpenGL command stream a flush.
- glFlush();
- }
- glEndQuery(target);
-}
-
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
- : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
- QueryType::SamplesPassed}}} {}
+QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
+ : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
+ HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
+ gl_rasterizer)},
+ gl_rasterizer{gl_rasterizer} {}
QueryCache::~QueryCache() = default;
-void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
- const u64 addr_begin = static_cast<u64>(addr);
- const u64 addr_end = addr_begin + static_cast<u64>(size);
- const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
- const u64 cache_begin = query.GetCacheAddr();
- const u64 cache_end = cache_begin + query.GetSizeInBytes();
- return cache_begin < addr_end && addr_begin < cache_end;
- };
-
- const u64 page_end = addr_end >> PAGE_SHIFT;
- for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
- const auto& it = cached_queries.find(page);
- if (it == std::end(cached_queries)) {
- continue;
- }
- auto& contents = it->second;
- for (auto& query : contents) {
- if (!in_range(query)) {
- continue;
- }
- rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
- Flush(query);
- }
- contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
- std::end(contents));
- }
-}
-
-void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
- // We can handle flushes in the same way as invalidations.
- InvalidateRegion(addr, size);
-}
-
-void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
- auto& memory_manager = system.GPU().MemoryManager();
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-
- CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
- if (!query) {
- const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT_OR_EXECUTE(cpu_addr, return;);
-
- query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
- }
-
- query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
-}
-
-void QueryCache::UpdateCounters() {
- auto& samples_passed = GetStream(QueryType::SamplesPassed);
-
- const auto& regs = system.GPU().Maxwell3D().regs;
- samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
-}
-
-void QueryCache::ResetCounter(QueryType type) {
- GetStream(type).Reset(rasterizer.AnyCommandQueued());
-}
-
-void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
- reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
-}
-
-std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
- QueryType type) {
- auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
+OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
+ auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
OGLQuery query;
if (reserve.empty()) {
query.Create(GetTarget(type));
- } else {
- query = std::move(reserve.back());
- reserve.pop_back();
+ return query;
}
- return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
+ query = std::move(reserve.back());
+ reserve.pop_back();
+ return query;
}
-CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
- const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
- auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
- rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
- return stored_ref;
-}
-
-CachedQuery* QueryCache::TryGet(CacheAddr addr) {
- const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
- const auto it = cached_queries.find(page);
- if (it == std::end(cached_queries)) {
- return nullptr;
- }
- auto& contents = it->second;
- const auto found =
- std::find_if(std::begin(contents), std::end(contents),
- [addr](const auto& query) { return query.GetCacheAddr() == addr; });
- return found != std::end(contents) ? &*found : nullptr;
-}
-
-void QueryCache::Flush(CachedQuery& cached_query) {
- auto& stream = GetStream(cached_query.GetType());
-
- // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
- // To avoid this disable and re-enable keeping the dependency stream.
- // But we only have to do this if we have pending waits to be done.
- const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
- const bool any_command_queued = rasterizer.AnyCommandQueued();
- if (slice_counter) {
- stream.Update(false, any_command_queued);
- }
-
- cached_query.Flush();
-
- if (slice_counter) {
- stream.Update(true, any_command_queued);
- }
+void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
+ queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
}
-CounterStream& QueryCache::GetStream(QueryType type) {
- return streams[static_cast<std::size_t>(type)];
+bool QueryCache::AnyCommandQueued() const noexcept {
+ return gl_rasterizer.AnyCommandQueued();
}
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
- OGLQuery&& query_)
- : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type)
+ : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+ type{type}, query{cache.AllocateQuery(type)} {
glBeginQuery(GetTarget(type), query.handle);
}
@@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
cache.Reserve(type, std::move(query));
}
-u64 HostCounter::Query() {
- if (result) {
- return *result;
- }
-
- u64 value;
- glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
- if (dependency) {
- value += dependency->Query();
+void HostCounter::EndQuery() {
+ if (!cache.AnyCommandQueued()) {
+ // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
+ // having any of these causes a lock. glFlush is considered a command, so we can safely wait
+ // for this. Insert to the OpenGL command stream a flush.
+ glFlush();
}
-
- return *(result = value);
+ glEndQuery(GetTarget(type));
}
-bool HostCounter::WaitPending() const noexcept {
- return result.has_value();
+u64 HostCounter::BlockingQuery() const {
+ GLint64 value;
+ glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
+ return static_cast<u64>(value);
}
-CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
- : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
+ : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
- : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
- counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
-
-CachedQuery::~CachedQuery() = default;
+ : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+ VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
+ cache = rhs.cache;
type = rhs.type;
- cpu_addr = rhs.cpu_addr;
- host_ptr = rhs.host_ptr;
- counter = std::move(rhs.counter);
- timestamp = rhs.timestamp;
return *this;
}
void CachedQuery::Flush() {
- // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
- // in these cases.
- const u64 value = counter ? counter->Query() : 0;
- std::memcpy(host_ptr, &value, sizeof(u64));
-
- if (timestamp) {
- std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
- }
-}
-
-void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
- if (counter) {
- // If there's an old counter set it means the query is being rewritten by the game.
- // To avoid losing the data forever, flush here.
- Flush();
+ // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
+ // To avoid this disable and re-enable keeping the dependency stream.
+ // But we only have to do this if we have pending waits to be done.
+ auto& stream = cache->Stream(type);
+ const bool slice_counter = WaitPending() && stream.IsEnabled();
+ if (slice_counter) {
+ stream.Update(false);
}
- counter = std::move(counter_);
- timestamp = timestamp_;
-}
-
-bool CachedQuery::WaitPending() const noexcept {
- return counter && counter->WaitPending();
-}
-QueryType CachedQuery::GetType() const noexcept {
- return type;
-}
+ VideoCommon::CachedQueryBase<HostCounter>::Flush();
-VAddr CachedQuery::GetCpuAddr() const noexcept {
- return cpu_addr;
-}
-
-CacheAddr CachedQuery::GetCacheAddr() const noexcept {
- return ToCacheAddr(host_ptr);
-}
-
-u64 CachedQuery::GetSizeInBytes() const noexcept {
- return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+ if (slice_counter) {
+ stream.Update(true);
+ }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d9f22b44d..20d337f15 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -13,6 +13,7 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/query_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -24,134 +25,57 @@ namespace OpenGL {
class CachedQuery;
class HostCounter;
-class RasterizerOpenGL;
class QueryCache;
+class RasterizerOpenGL;
-class CounterStream final {
-public:
- explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
- ~CounterStream();
-
- void Update(bool enabled, bool any_command_queued);
-
- void Reset(bool any_command_queued);
-
- std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
-
- bool IsEnabled() const {
- return current != nullptr;
- }
-
-private:
- void Enable();
-
- void Disable(bool any_command_queued);
-
- void EndQuery(bool any_command_queued);
+using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
- QueryCache& cache;
-
- std::shared_ptr<HostCounter> current;
- std::shared_ptr<HostCounter> last;
- VideoCore::QueryType type;
- GLenum target;
-};
-
-class QueryCache final {
+class QueryCache final
+ : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
~QueryCache();
- void InvalidateRegion(CacheAddr addr, std::size_t size);
-
- void FlushRegion(CacheAddr addr, std::size_t size);
-
- void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
-
- void UpdateCounters();
-
- void ResetCounter(VideoCore::QueryType type);
+ OGLQuery AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
- std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type);
+ bool AnyCommandQueued() const noexcept;
private:
- CachedQuery& Register(CachedQuery&& cached_query);
-
- CachedQuery* TryGet(CacheAddr addr);
-
- void Flush(CachedQuery& cached_query);
-
- CounterStream& GetStream(VideoCore::QueryType type);
-
- Core::System& system;
- RasterizerOpenGL& rasterizer;
-
- std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
-
- std::array<CounterStream, VideoCore::NumQueryTypes> streams;
- std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
+ RasterizerOpenGL& gl_rasterizer;
+ std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
};
-class HostCounter final {
+class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type, OGLQuery&& query);
+ VideoCore::QueryType type);
~HostCounter();
- /// Returns the current value of the query.
- u64 Query();
-
- /// Returns true when querying this counter will potentially wait for OpenGL.
- bool WaitPending() const noexcept;
+ void EndQuery();
private:
+ u64 BlockingQuery() const override;
+
QueryCache& cache;
VideoCore::QueryType type;
-
- std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
- OGLQuery query; ///< OpenGL query.
- std::optional<u64> result; ///< Added values of the counter.
+ OGLQuery query;
};
-class CachedQuery final {
+class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
- explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
- CachedQuery(CachedQuery&&) noexcept;
- CachedQuery(const CachedQuery&) = delete;
- ~CachedQuery();
-
- CachedQuery& operator=(CachedQuery&&) noexcept;
-
- /// Writes the counter value to host memory.
- void Flush();
-
- /// Updates the counter this cached query registered in guest memory will write when requested.
- void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
+ explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
+ u8* host_ptr);
+ CachedQuery(CachedQuery&& rhs) noexcept;
- /// Returns true when a flushing this query will potentially wait for OpenGL.
- bool WaitPending() const noexcept;
+ CachedQuery& operator=(CachedQuery&& rhs) noexcept;
- /// Returns the query type.
- VideoCore::QueryType GetType() const noexcept;
-
- /// Returns the guest CPU address for this query.
- VAddr GetCpuAddr() const noexcept;
-
- /// Returns the cache address for this query.
- CacheAddr GetCacheAddr() const noexcept;
-
- /// Returns the number of cached bytes.
- u64 GetSizeInBytes() const noexcept;
+ void Flush() override;
private:
- VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed).
- VAddr cpu_addr; ///< Guest CPU address.
- u8* host_ptr; ///< Writable host pointer.
- std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
- std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
+ QueryCache* cache;
+ VideoCore::QueryType type;
};
} // namespace OpenGL