From 76ca2a5f82f4df64cb839af42c93acb6705411ae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 2 Nov 2019 04:08:31 -0300 Subject: gl_rasterizer: Upload constant buffers with glNamedBufferSubData Nvidia's OpenGL driver maps gl(Named)BufferSubData with some requirements to a fast. This path has an extra memcpy but updates the buffer without orphaning or waiting for previous calls. It can be seen as a better model for "push constants" that can upload a whole UBO instead of 256 bytes. This path has some requirements established here: http://on-demand.gputechconf.com/gtc/2014/presentations/S4379-opengl-44-scene-rendering-techniques.pdf#page=24 Instead of using the stream buffer, this commits moves constant buffers uploads to calls of glNamedBufferSubData and from my testing it brings a performance improvement. This is disabled when the vendor is not Nvidia since it brings performance regressions. --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 31 +++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_buffer_cache.cpp') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index f8a807c84..0375fca17 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -8,13 +8,17 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) @@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz CachedBufferBlock::~CachedBufferBlock() = default; OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - std::size_t stream_size) - : VideoCommon::BufferCache{ - rasterizer, system, std::make_unique(stream_size, true)} {} + const Device& device, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, std::make_unique(stream_size, true)} { + if (!device.HasFastBufferSubData()) { + return; + } + + static constexpr auto size = static_cast(Maxwell::MaxConstBufferSize); + glCreateBuffers(static_cast(std::size(cbufs)), std::data(cbufs)); + for (const GLuint cbuf : cbufs) { + glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); + } +} -OGLBufferCache::~OGLBufferCache() = default; +OGLBufferCache::~OGLBufferCache() { + glDeleteBuffers(static_cast(std::size(cbufs)), std::data(cbufs)); +} Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { return std::make_shared(cache_addr, size); @@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t static_cast(size)); } +OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, + std::size_t size) { + DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); + const GLuint& cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast(size), raw_pointer); + return {&cbuf, 0}; +} + } // namespace OpenGL -- cgit v1.2.3