diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 201 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.h | 123 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 30 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 10 |
4 files changed, 298 insertions, 66 deletions
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 1c7dc999a..8f0e8241d 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -2,58 +2,203 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> +#include <memory> +#include <utility> +#include <vector> + #include <glad/glad.h> +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_query_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" namespace OpenGL { -HostCounter::HostCounter(GLenum target) { - query.Create(target); +using VideoCore::QueryType; + +namespace { + +constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; + +constexpr GLenum GetTarget(QueryType type) { + return QueryTargets[static_cast<std::size_t>(type)]; } -HostCounter::~HostCounter() = default; +} // Anonymous namespace + +CounterStream::CounterStream(QueryCache& cache, QueryType type) + : cache{cache}, type{type}, target{GetTarget(type)} {} -void HostCounter::UpdateState(bool enabled) { +CounterStream::~CounterStream() = default; + +void CounterStream::Update(bool enabled, bool any_command_queued) { if (enabled) { - Enable(); - } else { - Disable(); + if (!current) { + current = cache.GetHostCounter(last, type); + } + return; } + + if (current) { + EndQuery(any_command_queued); + } + last = std::exchange(current, nullptr); } -void HostCounter::Reset() { - counter = 0; - Disable(); +void CounterStream::Reset(bool any_command_queued) { + if (current) { + EndQuery(any_command_queued); + } + current = nullptr; + last = nullptr; } -u64 HostCounter::Query() { - if (!is_beginned) { - return counter; +std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) { + if (!current) { + return nullptr; + } + EndQuery(any_command_queued); + last = std::move(current); + current = cache.GetHostCounter(last, type); + return last; +} + +void CounterStream::EndQuery(bool any_command_queued) { + if (!any_command_queued) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); + } + glEndQuery(target); +} + +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) + : RasterizerCache{rasterizer}, system{system}, + rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {} + +QueryCache::~QueryCache() = default; + +void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) { + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + auto query = TryGet(host_ptr); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr); + Register(query); + } + + query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued())); + query->MarkAsModified(true, *this); +} + +void QueryCache::UpdateCounters() { + auto& samples_passed = GetStream(QueryType::SamplesPassed); + + const auto& regs = system.GPU().Maxwell3D().regs; + samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued()); +} + +void QueryCache::ResetCounter(QueryType type) { + GetStream(type).Reset(rasterizer.AnyCommandQueued()); +} + +void QueryCache::Reserve(QueryType type, OGLQuery&& query) { + reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query)); +} + +std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency, + QueryType type) { + const auto type_index = static_cast<std::size_t>(type); + auto& reserve = reserved_queries[type_index]; + + if (reserve.empty()) { + return std::make_shared<HostCounter>(*this, std::move(dependency), type); } - Disable(); - u64 value; - glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); - Enable(); - counter += value; + auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type, + std::move(reserve.back())); + reserve.pop_back(); return counter; } -void HostCounter::Enable() { - if (is_beginned) { - return; +void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) { + auto& counter = *counter_; + auto& stream = GetStream(counter.GetType()); + + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + const bool is_enabled = stream.IsEnabled(); + if (is_enabled) { + stream.Update(false, false); + } + + counter.Flush(); + + if (is_enabled) { + stream.Update(true, false); } - is_beginned = true; - glBeginQuery(GL_SAMPLES_PASSED, query.handle); } -void HostCounter::Disable() { - if (!is_beginned) { - return; +CounterStream& QueryCache::GetStream(QueryType type) { + return streams[static_cast<std::size_t>(type)]; +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type) + : cache{cache}, type{type}, dependency{std::move(dependency)} { + const GLenum target = GetTarget(type); + query.Create(target); + glBeginQuery(target, query.handle); +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, + OGLQuery&& query_) + : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { + glBeginQuery(GetTarget(type), query.handle); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, std::move(query)); +} + +u64 HostCounter::Query() { + if (query.handle == 0) { + return result; + } + + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result); + + if (dependency) { + result += dependency->Query(); } - glEndQuery(GL_SAMPLES_PASSED); - is_beginned = false; + + return result; +} + +CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + +CachedQuery::~CachedQuery() = default; + +void CachedQuery::Flush() { + const u64 value = counter->Query(); + std::memcpy(host_ptr, &value, sizeof(value)); +} + +void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) { + counter = std::move(counter_); +} + +QueryType CachedQuery::GetType() const { + return type; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 52c6546bf..91594b120 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -4,38 +4,131 @@ #pragma once +#include <array> +#include <memory> +#include <optional> +#include <vector> + #include <glad/glad.h> #include "common/common_types.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +namespace Core { +class System; +} + namespace OpenGL { -class HostCounter final { +class CachedQuery; +class HostCounter; +class RasterizerOpenGL; +class QueryCache; + +class CounterStream final { public: - explicit HostCounter(GLenum target); - ~HostCounter(); + explicit CounterStream(QueryCache& cache, VideoCore::QueryType type); + ~CounterStream(); + + void Update(bool enabled, bool any_command_queued); + + void Reset(bool any_command_queued); + + std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued); + + bool IsEnabled() const { + return current != nullptr; + } + +private: + void EndQuery(bool any_command_queued); + + QueryCache& cache; + + std::shared_ptr<HostCounter> current; + std::shared_ptr<HostCounter> last; + VideoCore::QueryType type; + GLenum target; +}; + +class QueryCache final : public RasterizerCache<std::shared_ptr<CachedQuery>> { +public: + explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + ~QueryCache(); + + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type); + + void UpdateCounters(); + + void ResetCounter(VideoCore::QueryType type); - /// Enables or disables the counter as required. - void UpdateState(bool enabled); + void Reserve(VideoCore::QueryType type, OGLQuery&& query); - /// Resets the counter disabling it if needed. - void Reset(); + std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + +protected: + void FlushObjectInner(const std::shared_ptr<CachedQuery>& counter) override; + +private: + CounterStream& GetStream(VideoCore::QueryType type); + + Core::System& system; + RasterizerOpenGL& rasterizer; + + std::array<CounterStream, VideoCore::NumQueryTypes> streams; + std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries; +}; + +class HostCounter final { +public: + explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type, OGLQuery&& query); + ~HostCounter(); /// Returns the current value of the query. - /// @note It may harm precision of future queries if the counter is not disabled. u64 Query(); private: - /// Enables the counter when disabled. - void Enable(); + QueryCache& cache; + VideoCore::QueryType type; - /// Disables the counter when enabled. - void Disable(); + std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one. + OGLQuery query; ///< OpenGL query. + u64 result; ///< Added values of the counter. +}; + +class CachedQuery final : public RasterizerCacheObject { +public: + explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); + ~CachedQuery(); + + /// Writes the counter value to host memory. + void Flush(); + + /// Updates the counter this cached query registered in guest memory will write when requested. + void SetCounter(std::shared_ptr<HostCounter> counter); - OGLQuery query; ///< OpenGL query. - u64 counter{}; ///< Added values of the counter. - bool is_beginned{}; ///< True when the OpenGL query is beginned. + /// Returns the query type. + VideoCore::QueryType GetType() const; + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return sizeof(u64); + } + +private: + VideoCore::QueryType type; + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 652db705b..827f85884 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -548,9 +549,9 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; - samples_passed.UpdateState(regs.samplecnt_enable); + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -718,24 +719,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - samples_passed.Reset(); - break; - default: - UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type)); - break; - } + query_cache.ResetCounter(type); } -u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - return samples_passed.Query(); - default: - UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type)); - return 1; - } +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { + query_cache.Query(gpu_addr, type); } void RasterizerOpenGL::FlushAll() {} @@ -747,6 +735,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -757,6 +746,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 857a6c073..4fb6811a7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -63,7 +63,7 @@ public: void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; void ResetCounter(VideoCore::QueryType type) override; - u64 Query(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -78,6 +78,11 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + /// Returns true when there are commands queued to the OpenGL server. + bool AnyCommandQueued() const { + return num_queued_commands > 0; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -207,6 +212,7 @@ private: ShaderCacheOpenGL shader_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; + QueryCache query_cache; Core::System& system; ScreenInfo& screen_info; @@ -224,8 +230,6 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - HostCounter samples_passed{GL_SAMPLES_PASSED}; - /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; }; |