summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/maxwell_3d.cpp41
-rw-r--r--src/video_core/engines/maxwell_3d.h4
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp201
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h123
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp30
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
7 files changed, 328 insertions, 86 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index fe91ff6a0..9add2bc94 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -556,23 +556,13 @@ void Maxwell3D::ProcessQueryGet() {
// matches the current payload.
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
break;
- case Regs::QueryOperation::Counter: {
- u64 result;
- switch (regs.query.query_get.select) {
- case Regs::QuerySelect::Zero:
- result = 0;
- break;
- case Regs::QuerySelect::SamplesPassed:
- result = rasterizer.Query(VideoCore::QueryType::SamplesPassed);
- break;
- default:
- result = 1;
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
+ case Regs::QueryOperation::Counter:
+ if (const std::optional<u64> result = GetQueryResult()) {
+ // If the query returns an empty optional it means it's cached and deferred.
+ // In this case we have a non-empty result, so we stamp it immediately.
+ StampQueryResult(*result, regs.query.query_get.short_query == 0);
}
- StampQueryResult(result, regs.query.query_get.short_query == 0);
break;
- }
case Regs::QueryOperation::Trap:
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
break;
@@ -595,20 +585,20 @@ void Maxwell3D::ProcessQueryCondition() {
}
case Regs::ConditionMode::ResNonZero: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
break;
}
case Regs::ConditionMode::Equal: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on =
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
break;
}
case Regs::ConditionMode::NotEqual: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on =
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
break;
@@ -674,6 +664,21 @@ void Maxwell3D::DrawArrays() {
}
}
+std::optional<u64> Maxwell3D::GetQueryResult() {
+ switch (regs.query.query_get.select) {
+ case Regs::QuerySelect::Zero:
+ return 0;
+ case Regs::QuerySelect::SamplesPassed:
+ // Deferred.
+ rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed);
+ return {};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
+ return 1;
+ }
+}
+
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[stage_index];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d21f678ed..26939be3f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
#include <array>
#include <bitset>
+#include <optional>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -1462,6 +1463,9 @@ private:
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
+
+ /// Returns a query's value or an empty object if the value will be deferred through a cache.
+ std::optional<u64> GetQueryResult();
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 2fc627539..a394f2d3e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,6 +20,7 @@ namespace VideoCore {
enum class QueryType {
SamplesPassed,
};
+constexpr std::size_t NumQueryTypes = 1;
enum class LoadCallbackStage {
Prepare,
@@ -48,8 +49,8 @@ public:
/// Resets the counter of a query
virtual void ResetCounter(QueryType type) = 0;
- /// Returns the value of a GPU query
- virtual u64 Query(QueryType type) = 0;
+ /// Records a GPU query and caches it
+ virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 1c7dc999a..8f0e8241d 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -2,58 +2,203 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstring>
+#include <memory>
+#include <utility>
+#include <vector>
+
#include <glad/glad.h>
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
-HostCounter::HostCounter(GLenum target) {
- query.Create(target);
+using VideoCore::QueryType;
+
+namespace {
+
+constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
+
+constexpr GLenum GetTarget(QueryType type) {
+ return QueryTargets[static_cast<std::size_t>(type)];
}
-HostCounter::~HostCounter() = default;
+} // Anonymous namespace
+
+CounterStream::CounterStream(QueryCache& cache, QueryType type)
+ : cache{cache}, type{type}, target{GetTarget(type)} {}
-void HostCounter::UpdateState(bool enabled) {
+CounterStream::~CounterStream() = default;
+
+void CounterStream::Update(bool enabled, bool any_command_queued) {
if (enabled) {
- Enable();
- } else {
- Disable();
+ if (!current) {
+ current = cache.GetHostCounter(last, type);
+ }
+ return;
}
+
+ if (current) {
+ EndQuery(any_command_queued);
+ }
+ last = std::exchange(current, nullptr);
}
-void HostCounter::Reset() {
- counter = 0;
- Disable();
+void CounterStream::Reset(bool any_command_queued) {
+ if (current) {
+ EndQuery(any_command_queued);
+ }
+ current = nullptr;
+ last = nullptr;
}
-u64 HostCounter::Query() {
- if (!is_beginned) {
- return counter;
+std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
+ if (!current) {
+ return nullptr;
+ }
+ EndQuery(any_command_queued);
+ last = std::move(current);
+ current = cache.GetHostCounter(last, type);
+ return last;
+}
+
+void CounterStream::EndQuery(bool any_command_queued) {
+ if (!any_command_queued) {
+ // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
+ // having any of these causes a lock. glFlush is considered a command, so we can safely wait
+ // for this. Insert to the OpenGL command stream a flush.
+ glFlush();
+ }
+ glEndQuery(target);
+}
+
+QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
+ : RasterizerCache{rasterizer}, system{system},
+ rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {}
+
+QueryCache::~QueryCache() = default;
+
+void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) {
+ auto& memory_manager = system.GPU().MemoryManager();
+ const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+
+ auto query = TryGet(host_ptr);
+ if (!query) {
+ const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+
+ query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr);
+ Register(query);
+ }
+
+ query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()));
+ query->MarkAsModified(true, *this);
+}
+
+void QueryCache::UpdateCounters() {
+ auto& samples_passed = GetStream(QueryType::SamplesPassed);
+
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
+}
+
+void QueryCache::ResetCounter(QueryType type) {
+ GetStream(type).Reset(rasterizer.AnyCommandQueued());
+}
+
+void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
+ reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
+}
+
+std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
+ QueryType type) {
+ const auto type_index = static_cast<std::size_t>(type);
+ auto& reserve = reserved_queries[type_index];
+
+ if (reserve.empty()) {
+ return std::make_shared<HostCounter>(*this, std::move(dependency), type);
}
- Disable();
- u64 value;
- glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
- Enable();
- counter += value;
+ auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type,
+ std::move(reserve.back()));
+ reserve.pop_back();
return counter;
}
-void HostCounter::Enable() {
- if (is_beginned) {
- return;
+void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) {
+ auto& counter = *counter_;
+ auto& stream = GetStream(counter.GetType());
+
+ // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
+ // To avoid this disable and re-enable keeping the dependency stream.
+ const bool is_enabled = stream.IsEnabled();
+ if (is_enabled) {
+ stream.Update(false, false);
+ }
+
+ counter.Flush();
+
+ if (is_enabled) {
+ stream.Update(true, false);
}
- is_beginned = true;
- glBeginQuery(GL_SAMPLES_PASSED, query.handle);
}
-void HostCounter::Disable() {
- if (!is_beginned) {
- return;
+CounterStream& QueryCache::GetStream(QueryType type) {
+ return streams[static_cast<std::size_t>(type)];
+}
+
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type)
+ : cache{cache}, type{type}, dependency{std::move(dependency)} {
+ const GLenum target = GetTarget(type);
+ query.Create(target);
+ glBeginQuery(target, query.handle);
+}
+
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
+ OGLQuery&& query_)
+ : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
+ glBeginQuery(GetTarget(type), query.handle);
+}
+
+HostCounter::~HostCounter() {
+ cache.Reserve(type, std::move(query));
+}
+
+u64 HostCounter::Query() {
+ if (query.handle == 0) {
+ return result;
+ }
+
+ glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result);
+
+ if (dependency) {
+ result += dependency->Query();
}
- glEndQuery(GL_SAMPLES_PASSED);
- is_beginned = false;
+
+ return result;
+}
+
+CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
+ : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+
+CachedQuery::~CachedQuery() = default;
+
+void CachedQuery::Flush() {
+ const u64 value = counter->Query();
+ std::memcpy(host_ptr, &value, sizeof(value));
+}
+
+void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) {
+ counter = std::move(counter_);
+}
+
+QueryType CachedQuery::GetType() const {
+ return type;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 52c6546bf..91594b120 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -4,38 +4,131 @@
#pragma once
+#include <array>
+#include <memory>
+#include <optional>
+#include <vector>
+
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+namespace Core {
+class System;
+}
+
namespace OpenGL {
-class HostCounter final {
+class CachedQuery;
+class HostCounter;
+class RasterizerOpenGL;
+class QueryCache;
+
+class CounterStream final {
public:
- explicit HostCounter(GLenum target);
- ~HostCounter();
+ explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
+ ~CounterStream();
+
+ void Update(bool enabled, bool any_command_queued);
+
+ void Reset(bool any_command_queued);
+
+ std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
+
+ bool IsEnabled() const {
+ return current != nullptr;
+ }
+
+private:
+ void EndQuery(bool any_command_queued);
+
+ QueryCache& cache;
+
+ std::shared_ptr<HostCounter> current;
+ std::shared_ptr<HostCounter> last;
+ VideoCore::QueryType type;
+ GLenum target;
+};
+
+class QueryCache final : public RasterizerCache<std::shared_ptr<CachedQuery>> {
+public:
+ explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+ ~QueryCache();
+
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type);
+
+ void UpdateCounters();
+
+ void ResetCounter(VideoCore::QueryType type);
- /// Enables or disables the counter as required.
- void UpdateState(bool enabled);
+ void Reserve(VideoCore::QueryType type, OGLQuery&& query);
- /// Resets the counter disabling it if needed.
- void Reset();
+ std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type);
+
+protected:
+ void FlushObjectInner(const std::shared_ptr<CachedQuery>& counter) override;
+
+private:
+ CounterStream& GetStream(VideoCore::QueryType type);
+
+ Core::System& system;
+ RasterizerOpenGL& rasterizer;
+
+ std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+ std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
+};
+
+class HostCounter final {
+public:
+ explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type);
+ explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+ VideoCore::QueryType type, OGLQuery&& query);
+ ~HostCounter();
/// Returns the current value of the query.
- /// @note It may harm precision of future queries if the counter is not disabled.
u64 Query();
private:
- /// Enables the counter when disabled.
- void Enable();
+ QueryCache& cache;
+ VideoCore::QueryType type;
- /// Disables the counter when enabled.
- void Disable();
+ std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
+ OGLQuery query; ///< OpenGL query.
+ u64 result; ///< Added values of the counter.
+};
+
+class CachedQuery final : public RasterizerCacheObject {
+public:
+ explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
+ ~CachedQuery();
+
+ /// Writes the counter value to host memory.
+ void Flush();
+
+ /// Updates the counter this cached query registered in guest memory will write when requested.
+ void SetCounter(std::shared_ptr<HostCounter> counter);
- OGLQuery query; ///< OpenGL query.
- u64 counter{}; ///< Added values of the counter.
- bool is_beginned{}; ///< True when the OpenGL query is beginned.
+ /// Returns the query type.
+ VideoCore::QueryType GetType() const;
+
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
+ }
+
+ std::size_t GetSizeInBytes() const override {
+ return sizeof(u64);
+ }
+
+private:
+ VideoCore::QueryType type;
+ VAddr cpu_addr; ///< Guest CPU address.
+ u8* host_ptr; ///< Writable host pointer.
+ std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 652db705b..827f85884 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,6 +25,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
- shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
- buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+ shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
+ screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -548,9 +549,9 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
-
const auto& regs = gpu.regs;
- samples_passed.UpdateState(regs.samplecnt_enable);
+
+ query_cache.UpdateCounters();
SyncRasterizeEnable(state);
SyncColorMask();
@@ -718,24 +719,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
- switch (type) {
- case VideoCore::QueryType::SamplesPassed:
- samples_passed.Reset();
- break;
- default:
- UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
- break;
- }
+ query_cache.ResetCounter(type);
}
-u64 RasterizerOpenGL::Query(VideoCore::QueryType type) {
- switch (type) {
- case VideoCore::QueryType::SamplesPassed:
- return samples_passed.Query();
- default:
- UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
- return 1;
- }
+void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) {
+ query_cache.Query(gpu_addr, type);
}
void RasterizerOpenGL::FlushAll() {}
@@ -747,6 +735,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
}
texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size);
+ query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -757,6 +746,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
+ query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 857a6c073..4fb6811a7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -63,7 +63,7 @@ public:
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
- u64 Query(VideoCore::QueryType type) override;
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -78,6 +78,11 @@ public:
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
+ /// Returns true when there are commands queued to the OpenGL server.
+ bool AnyCommandQueued() const {
+ return num_queued_commands > 0;
+ }
+
private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
@@ -207,6 +212,7 @@ private:
ShaderCacheOpenGL shader_cache;
SamplerCacheOpenGL sampler_cache;
FramebufferCacheOpenGL framebuffer_cache;
+ QueryCache query_cache;
Core::System& system;
ScreenInfo& screen_info;
@@ -224,8 +230,6 @@ private:
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
- HostCounter samples_passed{GL_SAMPLES_PASSED};
-
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
};