From 1dd754590fb9850bf00ddacbb860076dbbacabc6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 26 Jun 2018 16:14:14 -0400 Subject: gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces. gl_rasterizer_cache: Improved cache management based on Citra's implementation. gl_surface_cache: Add some docstrings. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 25 +++-- .../renderer_opengl/gl_rasterizer_cache.cpp | 116 +++++++++++++++++++-- .../renderer_opengl/gl_rasterizer_cache.h | 43 +++++++- 3 files changed, 168 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b0ce434..62ee45a36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() { // Mark framebuffer surfaces as dirty if (color_surface != nullptr && write_color_fb) { - res_cache.FlushSurface(color_surface); + res_cache.MarkSurfaceAsDirty(color_surface); } if (depth_surface != nullptr && write_depth_fb) { - res_cache.FlushSurface(depth_surface); + res_cache.MarkSurfaceAsDirty(depth_surface); } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} -void RasterizerOpenGL::FlushAll() {} +void RasterizerOpenGL::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); +} -void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} -void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size); +} -void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size); +} bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd35bdb02..71ad7be74 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/textures/astc.h" @@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { texture.Create(); const auto& rect{params.GetRect()}; AllocateSurfaceTexture(texture.handle, @@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { draw_framebuffer.Create(); } +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + while (!surface_cache.empty()) { + UnregisterSurface(surface_cache.begin()->second); + } +} + Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { return GetSurface(SurfaceParams::CreateForTexture(config)); } @@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { - surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); - surface->FlushGLBuffer(); +void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { + if (Settings::values.use_accurate_framebuffers) { + // If enabled, always flush dirty surfaces + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); + } else { + // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads + // and flushes are very slow and do not seem to improve accuracy + const auto& params{surface->GetSurfaceParams()}; + Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); + } } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { @@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { Surface surface; if (search != surface_cache.end()) { surface = search->second; + if (Settings::values.use_accurate_framebuffers) { + // Reload the surface from Switch memory + LoadSurface(surface); + } } else { surface = std::make_shared(params); - surface_cache[surface_key] = surface; + RegisterSurface(surface); + LoadSurface(surface); } - LoadSurface(surface); - return surface; } @@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { return surfaces[0]; } + +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { + // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should + // probably implement this in the future, but for now, the `use_accurate_framebufers` setting + // can be used to always flush. +} + +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { + for (const auto& pair : surface_cache) { + const auto& surface{pair.second}; + const auto& params{surface->GetSurfaceParams()}; + + if (params.IsOverlappingRegion(addr, size)) { + UnregisterSurface(surface); + } + } +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; + + if (search != surface_cache.end()) { + // Registered already + return; + } + + surface_cache[surface_key] = surface; + UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; + + if (search == surface_cache.end()) { + // Unregistered already + return; + } + + UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); + surface_cache.erase(search); +} + +template +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { + const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - + (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; + const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; + const u64 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) + << Tegra::MemoryManager::PAGE_BITS; + const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) + << Tegra::MemoryManager::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 84bdec652..85e7c8888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -8,7 +8,7 @@ #include #include #include - +#include #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" @@ -19,6 +19,7 @@ class CachedSurface; using Surface = std::shared_ptr; using SurfaceSurfaceRect_Tuple = std::tuple>; +using PageMap = boost::icl::interval_map; struct SurfaceParams { enum class PixelFormat { @@ -243,8 +244,10 @@ struct SurfaceParams { return SurfaceType::Invalid; } + /// Returns the rectangle corresponding to this surface MathUtil::Rectangle GetRect() const; + /// Returns the size of this surface in bytes, adjusted for compression size_t SizeInBytes() const { const u32 compression_factor{GetCompressionFactor(pixel_format)}; ASSERT(width % compression_factor == 0); @@ -253,10 +256,18 @@ struct SurfaceParams { GetFormatBpp(pixel_format) / CHAR_BIT; } + /// Returns the CPU virtual address for this surface VAddr GetCpuAddr() const; + /// Returns true if the specified region overlaps with this surface's region in Switch memory + bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { + return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); + } + + /// Creates SurfaceParams from a texture configation static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + /// Creates SurfaceParams from a framebuffer configation static SurfaceParams CreateForFramebuffer( const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); @@ -272,6 +283,7 @@ struct SurfaceParams { size_t size_in_bytes; }; +/// Hashable variation of SurfaceParams, used for a key in the surface cache struct SurfaceKey : Common::HashableStruct { static SurfaceKey Create(const SurfaceParams& params) { SurfaceKey res; @@ -325,18 +337,43 @@ private: class RasterizerCacheOpenGL final : NonCopyable { public: RasterizerCacheOpenGL(); + ~RasterizerCacheOpenGL(); + /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); + + /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport); - void LoadSurface(const Surface& surface); - void FlushSurface(const Surface& surface); + + /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory + void MarkSurfaceAsDirty(const Surface& surface); + + /// Tries to find a framebuffer GPU address based on the provided CPU address Surface TryFindFramebufferSurface(VAddr cpu_addr) const; + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(Tegra::GPUVAddr addr, size_t size); + + /// Mark the specified region as being invalidated + void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); + private: + void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params); + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); + std::unordered_map surface_cache; + PageMap cached_pages; + OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; }; -- cgit v1.2.3