diff options
author | bunnei <bunneidev@gmail.com> | 2018-06-29 20:07:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-29 20:07:28 +0200 |
commit | 50ef2beb5850362dda9c1d50531475f0c5b6e8df (patch) | |
tree | ea63b3666a58af16fe7985c28fccc7890c45f951 | |
parent | Merge pull request #588 from mailwl/hwopus (diff) | |
parent | gl_rasterizer_cache: Only dereference color_surface/depth_surface if valid. (diff) | |
download | yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.tar yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.tar.gz yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.tar.bz2 yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.tar.lz yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.tar.xz yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.tar.zst yuzu-50ef2beb5850362dda9c1d50531475f0c5b6e8df.zip |
-rw-r--r-- | src/core/settings.h | 1 | ||||
-rw-r--r-- | src/core/telemetry_session.cpp | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 36 | ||||
-rw-r--r-- | src/video_core/rasterizer_interface.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 107 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1374 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 331 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 2 | ||||
-rw-r--r-- | src/yuzu/configuration/config.cpp | 3 | ||||
-rw-r--r-- | src/yuzu/configuration/configure_graphics.cpp | 2 | ||||
-rw-r--r-- | src/yuzu/configuration/configure_graphics.ui | 7 | ||||
-rw-r--r-- | src/yuzu_cmd/config.cpp | 2 | ||||
-rw-r--r-- | src/yuzu_cmd/default_ini.h | 4 |
15 files changed, 425 insertions, 1454 deletions
diff --git a/src/core/settings.h b/src/core/settings.h index a7f1e5fa0..7150d9755 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -129,6 +129,7 @@ struct Values { // Renderer float resolution_factor; bool toggle_framelimit; + bool use_accurate_framebuffers; float bg_red; float bg_green; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index a60aa1143..270d68222 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -161,6 +161,8 @@ TelemetrySession::TelemetrySession() { Settings::values.resolution_factor); AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit", Settings::values.toggle_framelimit); + AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers", + Settings::values.use_accurate_framebuffers); AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 2dc251205..180be4ff4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -321,6 +321,24 @@ public: INSERT_PADDING_WORDS(1); }; + struct RenderTargetConfig { + u32 address_high; + u32 address_low; + u32 width; + u32 height; + Tegra::RenderTargetFormat format; + u32 block_dimensions; + u32 array_mode; + u32 layer_stride; + u32 base_layer; + INSERT_PADDING_WORDS(7); + + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + }; + union { struct { INSERT_PADDING_WORDS(0x45); @@ -333,23 +351,7 @@ public: INSERT_PADDING_WORDS(0x1B8); - struct { - u32 address_high; - u32 address_low; - u32 width; - u32 height; - Tegra::RenderTargetFormat format; - u32 block_dimensions; - u32 array_mode; - u32 layer_stride; - u32 base_layer; - INSERT_PADDING_WORDS(7); - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - } rt[NumRenderTargets]; + RenderTargetConfig rt[NumRenderTargets]; struct { f32 scale_x; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f0e48a802..145e58334 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -51,9 +51,8 @@ public: } /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, - VAddr framebuffer_addr, u32 pixel_stride, - ScreenInfo& screen_info) { + virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3fbf8e1f9..62ee45a36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, u64 size = end - start + 1; // Copy vertex array data - res_cache.FlushRegion(start, size, nullptr); Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); // Bind the vertex array to the buffer at the current offset. @@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() { std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); - const u16 res_scale = color_surface != nullptr - ? color_surface->res_scale - : (depth_surface == nullptr ? 1u : depth_surface->res_scale); - MathUtil::Rectangle<u32> draw_rect{ + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right, + surfaces_rect.left, surfaces_rect.right)), // Right static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); // Sync the viewport - SyncViewport(surfaces_rect, res_scale); + SyncViewport(surfaces_rect); // Sync the blend state registers SyncBlendState(); @@ -442,19 +434,11 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); // Mark framebuffer surfaces as dirty - MathUtil::Rectangle<u32> draw_rect_unscaled{ - draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, - draw_rect.bottom / res_scale}; - if (color_surface != nullptr && write_color_fb) { - auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - color_surface); + res_cache.MarkSurfaceAsDirty(color_surface); } if (depth_surface != nullptr && write_depth_fb) { - auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - depth_surface); + res_cache.MarkSurfaceAsDirty(depth_surface); } } @@ -462,7 +446,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} void RasterizerOpenGL::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushAll(); + res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { @@ -472,13 +456,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.InvalidateRegion(addr, size, nullptr); + res_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size, nullptr); + res_cache.InvalidateRegion(addr, size); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { @@ -497,45 +481,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { return true; } -bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, +bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - if (framebuffer_addr == 0) { - return false; + if (!framebuffer_addr) { + return {}; } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); - SurfaceParams src_params; - src_params.cpu_addr = framebuffer_addr; - src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); - src_params.width = std::min(framebuffer.width, pixel_stride); - src_params.height = framebuffer.height; - src_params.stride = pixel_stride; - src_params.is_tiled = true; - src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; - src_params.pixel_format = - SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); - src_params.component_type = - SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); - src_params.UpdateParams(); - - MathUtil::Rectangle<u32> src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - - if (src_surface == nullptr) { - return false; + const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; + if (!surface) { + return {}; } - u32 scaled_width = src_surface->GetScaledWidth(); - u32 scaled_height = src_surface->GetScaledHeight(); - - screen_info.display_texcoords = MathUtil::Rectangle<float>( - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + // Verify that the cached surface is the same size and format as the requested framebuffer + const auto& params{surface->GetSurfaceParams()}; + const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)}; + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); - screen_info.display_texture = src_surface->texture.handle; + screen_info.display_texture = surface->Texture().handle; return true; } @@ -674,7 +641,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; + state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle; state.texture_units[current_bindpoint].swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); state.texture_units[current_bindpoint].swizzle.g = @@ -700,16 +667,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); + color_surface != nullptr ? color_surface->Texture().handle : 0, 0); if (depth_surface != nullptr) { if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); } else { // attach depth glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); // clear stencil attachment glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); } @@ -720,14 +687,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, } } -void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; - state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; - state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); - state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); + state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left; + state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom; + state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); + state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight()); } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4762983c9..621200f03 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -109,7 +109,7 @@ private: u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); /// Syncs the viewport to match the guest state - void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); + void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 857164ff6..63f5999ea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1,37 +1,23 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <algorithm> -#include <atomic> -#include <cstring> -#include <iterator> -#include <memory> -#include <utility> -#include <vector> -#include <boost/optional.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> + #include "common/alignment.h" -#include "common/bit_field.h" -#include "common/color.h" -#include "common/logging/log.h" -#include "common/math_util.h" +#include "common/assert.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" #include "video_core/utils.h" -#include "video_core/video_core.h" using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; @@ -44,6 +30,40 @@ struct FormatTuple { bool compressed; }; +/*static*/ SurfaceParams SurfaceParams::CreateForTexture( + const Tegra::Texture::FullTextureInfo& config) { + + SurfaceParams params{}; + params.addr = config.tic.Address(); + params.is_tiled = config.tic.IsTiled(); + params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format); + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); + params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.unaligned_height = config.tic.Height(); + params.size_in_bytes = params.SizeInBytes(); + return params; +} + +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { + + SurfaceParams params{}; + params.addr = config.Address(); + params.is_tiled = true; + params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + params.size_in_bytes = params.SizeInBytes(); + return params; +} + static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5 @@ -63,8 +83,8 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::ColorTexture) { ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are - // type FLOAT + // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which + // are type FLOAT ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F || pixel_format == PixelFormat::R11FG11FB10F); return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; @@ -77,65 +97,70 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return {}; } -template <typename Map, typename Interval> -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); +VAddr SurfaceParams::GetCpuAddr() const { + const auto& gpu = Core::System::GetInstance().GPU(); + return *gpu.memory_manager->GpuToCpuAddress(addr); } -static u16 GetResolutionScaleFactor() { - return static_cast<u16>(!Settings::values.resolution_factor - ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() - : Settings::values.resolution_factor); +static bool IsPixelFormatASTC(PixelFormat format) { + switch (format) { + case PixelFormat::ASTC_2D_4X4: + return true; + default: + return false; + } } -static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { - u32 block_width{}; - u32 block_height{}; - +static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { switch (format) { case PixelFormat::ASTC_2D_4X4: - block_width = 4; - block_height = 4; - break; + return {4, 4}; default: NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); UNREACHABLE(); } +} + +MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { + u32 actual_height{unaligned_height}; + if (IsPixelFormatASTC(pixel_format)) { + // ASTC formats must stop at the ATSC block size boundary + actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); + } + return {0, actual_height, width, 0}; +} +static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { + u32 block_width{}; + u32 block_height{}; + std::tie(block_width, block_height) = GetASTCBlockSize(format); data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); } template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, - Tegra::GPUVAddr start, Tegra::GPUVAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { auto data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(base), + *gpu.memory_manager->GpuToCpuAddress(addr), SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - if (SurfaceParams::IsFormatASTC(format)) { - // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this - ConvertASTCToRGBA8(data, format, stride, height); - } - std::memcpy(gl_buffer, data.data(), data.size()); } else { - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should + // check the configuration for this and perform more generic un/swizzle NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, morton_to_gl); } } -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, - Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, @@ -146,8 +171,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, - Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { MortonCopy<false, PixelFormat::ABGR8>, @@ -192,374 +216,79 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, - const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, - GLuint read_fb_handle, GLuint draw_fb_handle) { - - glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, - GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), - src_rect.GetHeight(), 0); - return true; -} - -static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { - UNREACHABLE(); - return {}; -} - -SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { - SurfaceParams params = *this; - const u32 tiled_size = is_tiled ? 8 : 1; - const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - Tegra::GPUVAddr aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - Tegra::GPUVAddr aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); - - if (aligned_end - aligned_start > stride_tiled_bytes) { - params.addr = aligned_start; - params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); - } else { - // 1 row - ASSERT(aligned_end - aligned_start == stride_tiled_bytes); - const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); - aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); - aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); - params.addr = aligned_start; - params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); - params.stride = params.width; - params.height = tiled_size; - } - params.UpdateParams(); - - return params; -} - -SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { - if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { - return {}; - } - - if (is_tiled) { - unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; - unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; - unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; - unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; - } - - const u32 stride_tiled = !is_tiled ? stride : stride * 8; - - const u32 pixel_offset = - stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + - unscaled_rect.left; - - const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); - - return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; -} - -MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { - const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); - - if (is_tiled) { - const int x0 = (begin_pixel_index % (stride * 8)) / 8; - const int y0 = (begin_pixel_index / (stride * 8)) * 8; - // Top to bottom - return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, - height - (y0 + sub_surface.height)); - } - - const int x0 = begin_pixel_index % stride; - const int y0 = begin_pixel_index / stride; - // Bottom to top - return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); -} - -MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { - auto rect = GetSubRect(sub_surface); - rect.left = rect.left * res_scale; - rect.right = rect.right * res_scale; - rect.top = rect.top * res_scale; - rect.bottom = rect.bottom * res_scale; - return rect; -} - -bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.block_height, other_surface.pixel_format, - other_surface.component_type, - other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, - pixel_format, component_type, is_tiled) && - pixel_format != PixelFormat::Invalid; -} - -bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - return sub_surface.addr >= addr && sub_surface.end <= end && - sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && - sub_surface.component_type == component_type && - (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && - GetSubRect(sub_surface).left + sub_surface.width <= stride; -} - -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && - addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && - component_type == expanded_surface.component_type && stride == expanded_surface.stride && - (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % - BytesInPixels(stride * (is_tiled ? 8 : 1)) == - 0; -} - -bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { - if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || - end < texcopy_params.end) { - return false; - } - if (texcopy_params.block_height != block_height || - texcopy_params.component_type != component_type) - return false; - - if (texcopy_params.width != texcopy_params.stride) { - const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); - return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && - ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; - } - return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); -} - -VAddr SurfaceParams::GetCpuAddr() const { - // When this function is used, only cpu_addr or (GPU) addr should be set, not both - ASSERT(!(cpu_addr && addr)); - const auto& gpu = Core::System::GetInstance().GPU(); - return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); - std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; - } - return true; - } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; -} - -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } - } - return result; -} - -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - - ASSERT(src_surface != dst_surface); - - // This is only called when CanCopy is true, no need to run checks here - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u64 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array<u8, 4> fill_buffer; - - u64 fill_buff_pos = fill_offset; - for (int i : {0, 1, 2, 3}) - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - - FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), - draw_framebuffer.handle); - return; - } - if (src_surface->CanSubRect(subrect_params)) { - BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), - dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), - src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); - return; - } - UNREACHABLE(); +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { + texture.Create(); + const auto& rect{params.GetRect()}; + AllocateSurfaceTexture(texture.handle, + GetFormatTuple(params.pixel_format, params.component_type), + rect.GetWidth(), rect.GetHeight()); } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { - ASSERT(type != SurfaceType::Fill); +void CachedSurface::LoadGLBuffer() { + ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); - if (texture_src_data == nullptr) - return; + u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); - if (gl_buffer == nullptr) { - gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); - gl_buffer.reset(new u8[gl_buffer_size]); - } + ASSERT(texture_src_data); - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); - ASSERT(load_start >= addr && load_end <= end); - const u64 start_offset = load_start - addr; + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - if (!is_tiled) { - const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + if (!params.is_tiled) { + const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - bytes_per_pixel * width * height); + std::memcpy(gl_buffer.data(), texture_src_data, + bytes_per_pixel * params.width * params.height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, - GetActualHeight(), &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + } + + if (IsPixelFormatASTC(params.pixel_format)) { + // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this + ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height); } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { - u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); - if (dst_buffer == nullptr) - return; - - ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); +void CachedSurface::FlushGLBuffer() { + u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); - // TODO: Should probably be done in ::Memory:: and check for other regions too - // same as loadglbuffer() - if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) - flush_end = Memory::VRAM_VADDR_END; - - if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) - flush_start = Memory::VRAM_VADDR; + ASSERT(dst_buffer); + ASSERT(gl_buffer.size() == + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); - ASSERT(flush_start >= addr && flush_end <= end); - const u64 start_offset = flush_start - addr; - const u64 end_offset = flush_end - addr; - - if (type == SurfaceType::Fill) { - const u64 coarse_start_offset = start_offset - (start_offset % fill_size); - const u64 backup_bytes = start_offset % fill_size; - std::array<u8, 4> backup_data; - if (backup_bytes) - std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - - for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { - std::memcpy(&dst_buffer[offset], &fill_data[0], - std::min(fill_size, end_offset - offset)); - } - - if (backup_bytes) - std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!is_tiled) { - std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + if (!params.is_tiled) { + std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { - gl_to_morton_fns[static_cast<size_t>(pixel_format)]( - stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); + gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, gl_buffer.data(), params.addr); } } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); -void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer_size == - GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); + ASSERT(gl_buffer.size() == + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + + const auto& rect{params.GetRect()}; // Load data from memory to the surface GLint x0 = static_cast<GLint>(rect.left); GLint y0 = static_cast<GLint>(rect.bottom); - size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); GLuint target_tex = texture.handle; - - // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in - // surface - OGLTexture unscaled_tex; - if (res_scale != 1) { - x0 = 0; - y0 = 0; - - unscaled_tex.Create(); - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - target_tex = unscaled_tex.handle; - } - OpenGLState cur_state = OpenGLState::GetCurState(); GLuint old_tex = cur_state.texture_units[0].texture_2d; @@ -567,15 +296,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint cur_state.Apply(); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width)); glActiveTexture(GL_TEXTURE0); if (tuple.compressed) { - glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, - static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), - static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, - static_cast<GLsizei>(size), &gl_buffer[buffer_offset]); + glCompressedTexImage2D( + GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), + static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes), + &gl_buffer[buffer_offset]); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, @@ -586,827 +315,238 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); - - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, - scaled_rect, type, read_fb_handle, draw_fb_handle); - } } MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureDL); - if (gl_buffer == nullptr) { - gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); - gl_buffer.reset(new u8[gl_buffer_size]); - } + gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); - size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - OGLTexture unscaled_tex; - unscaled_tex.Create(); - - MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, - read_fb_handle, draw_fb_handle); - - state.texture_units[0].texture_2d = unscaled_tex.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } else { - state.UnbindTexture(texture.handle); - state.draw.read_framebuffer = read_fb_handle; - state.Apply(); - - if (type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - } else if (type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } else { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - } - glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), - static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), - tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} - -enum class MatchFlags { - None = 0, - Invalid = 1, // Flag that can be applied to other match types, invalid matches require - // validation before they can be used - Exact = 1 << 1, // Surfaces perfectly match - SubRect = 1 << 2, // Surface encompasses params - Copy = 1 << 3, // Surface we can copy from - Expand = 1 << 4, // Surface that can expand params - TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters -}; - -constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { - return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); -} + const auto& rect{params.GetRect()}; + size_t buffer_offset = + (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format); -constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { - return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); -} + state.UnbindTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); -/// Get the best surface match (and its match type) for the given flags -template <MatchFlags find_flags> -Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - boost::optional<SurfaceInterval> validate_interval = boost::none) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (auto& surface : pair.second) { - bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - (find_flags & MatchFlags::Copy) != MatchFlags::None - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if ((find_flags & check_type) == MatchFlags::None) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } + if (params.type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } else if (params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); } - return match_surface; + glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), + static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); } RasterizerCacheOpenGL::RasterizerCacheOpenGL() { read_framebuffer.Create(); draw_framebuffer.Create(); - - attributeless_vao.Create(); - - d24s8_abgr_buffer.Create(); - d24s8_abgr_buffer_size = 0; - - const char* vs_source = R"( -#version 330 core -const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); -void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); -} -)"; - const char* fs_source = R"( -#version 330 core - -uniform samplerBuffer tbo; -uniform vec2 tbo_size; -uniform vec4 viewport; - -out vec4 color; - -void main() { - vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; - int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); - color = texelFetch(tbo, tbo_offset).rabg; -} -)"; - d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); - - OpenGLState state = OpenGLState::GetCurState(); - GLuint old_program = state.draw.shader_program; - state.draw.shader_program = d24s8_abgr_shader.handle; - state.Apply(); - - GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); - ASSERT(tbo_u_id != -1); - glUniform1i(tbo_u_id, 0); - - state.draw.shader_program = old_program; - state.Apply(); - - d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); - ASSERT(d24s8_abgr_tbo_size_u_id != -1); - d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); - ASSERT(d24s8_abgr_viewport_u_id != -1); } RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); -} - -bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, - const MathUtil::Rectangle<u32>& src_rect, - const Surface& dst_surface, - const MathUtil::Rectangle<u32>& dst_rect) { - if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) - return false; - - return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, - dst_rect, src_surface->type, read_framebuffer.handle, - draw_framebuffer.handle); -} - -void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, - const MathUtil::Rectangle<u32>& src_rect, - GLuint dst_tex, - const MathUtil::Rectangle<u32>& dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_framebuffer.handle; - state.draw.draw_framebuffer = draw_framebuffer.handle; - state.Apply(); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); - - GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; - if (target_pbo_size > d24s8_abgr_buffer_size) { - d24s8_abgr_buffer_size = target_pbo_size * 2; - glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); - } - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, - 0); - glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), - static_cast<GLsizei>(src_rect.GetWidth()), - static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - 0); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - // PBO now contains src_tex in RABG format - state.draw.shader_program = d24s8_abgr_shader.handle; - state.draw.vertex_array = attributeless_vao.handle; - state.viewport.x = static_cast<GLint>(dst_rect.left); - state.viewport.y = static_cast<GLint>(dst_rect.bottom); - state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); - state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); - state.Apply(); - - OGLTexture tbo; - tbo.Create(); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); - - glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), - static_cast<GLfloat>(src_rect.GetHeight())); - glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), - static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), - static_cast<GLfloat>(state.viewport.height)); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find it - // to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); + while (!surface_cache.empty()) { + UnregisterSurface(surface_cache.begin()->second); } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( - VAddr cpu_addr) const { - // Tries to find the GPU address of a framebuffer based on the CPU address. This is because - // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU - // addresses. We iterate through all cached framebuffers, and compare their starting CPU address - // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps - // surfaces. - - std::vector<Tegra::GPUVAddr> gpu_addresses; - for (const auto& pair : surface_cache) { - for (const auto& surface : pair.second) { - const VAddr surface_cpu_addr = surface->GetCpuAddr(); - if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { - ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); - gpu_addresses.push_back(surface->addr); - } - } - } - - if (gpu_addresses.empty()) { - return {}; - } - - ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); - return gpu_addresses[0]; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - ASSERT(surface->res_scale < params.res_scale); - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = static_cast<u32>( - new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); } Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - auto& gpu = Core::System::GetInstance().GPU(); - - SurfaceParams params; - params.addr = config.tic.Address(); - params.is_tiled = config.tic.IsTiled(); - params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); - - params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); - params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); - - // TODO(Subv): Different types per component are not supported. - ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && - config.tic.r_type.Value() == config.tic.b_type.Value() && - config.tic.r_type.Value() == config.tic.a_type.Value()); - - params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); - - if (config.tic.IsTiled()) { - params.block_height = config.tic.BlockHeight(); - - // TODO(bunnei): The below align up is a hack. This is here because some compressed textures - // are not a multiple of their own compression factor, and so this accounts for that. This - // could potentially result in an extra row of 4px being decoded if a texture is not a - // multiple of 4. - params.width = Common::AlignUp(params.width, 4); - params.height = Common::AlignUp(params.height, 4); - } else { - // Use the texture-provided stride value if the texture isn't tiled. - params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); - } - - params.UpdateParams(); - - return GetSurface(params, ScaleMatch::Ignore, true); + return GetSurface(SurfaceParams::CreateForTexture(config)); } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - const auto& config = regs.rt[0]; // TODO(bunnei): This is hard corded to use just the first render buffer NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); - // update resolution_scale_factor and reset cache if changed - // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We - // need to fix this before making the renderer multi-threaded. - static u16 resolution_scale_factor = GetResolutionScaleFactor(); - if (resolution_scale_factor != GetResolutionScaleFactor()) { - resolution_scale_factor = GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - - MathUtil::Rectangle<u32> viewport_clamped{ - static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), - static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), - static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))), - static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.width; - color_params.height = config.height; - // TODO(Subv): Can framebuffers use a different block height? - color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; - SurfaceParams depth_params = color_params; - - color_params.addr = config.Address(); - color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); - color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); - color_params.UpdateParams(); + const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])}; + const SurfaceParams depth_params{color_params}; ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); - // depth_params.addr = config.GetDepthBufferPhysicalAddress(); - // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - // depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } MathUtil::Rectangle<u32> color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + Surface color_surface; + if (using_color_fb) { + color_surface = GetSurface(color_params); + if (color_surface) { + color_rect = color_surface->GetSurfaceParams().GetRect(); + } + } MathUtil::Rectangle<u32> depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + Surface depth_surface; + if (using_depth_fb) { + depth_surface = GetSurface(depth_params); + if (depth_surface) { + depth_rect = depth_surface->GetSurfaceParams().GetRect(); + } + } MathUtil::Rectangle<u32> fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { + if (color_surface && depth_surface) { fb_rect = color_rect; // Color and Depth surfaces must have the same dimensions and offsets if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); + color_surface = GetSurface(color_params); + depth_surface = GetSurface(depth_params); + fb_rect = color_surface->GetSurfaceParams().GetRect(); } - } else if (color_surface != nullptr) { + } else if (color_surface) { fb_rect = color_rect; - } else if (depth_surface != nullptr) { + } else if (depth_surface) { fb_rect = depth_rect; } - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - } - return std::make_tuple(color_surface, depth_surface, fb_rect); } -Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNREACHABLE(); - return {}; +void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { + surface->LoadGLBuffer(); + surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { - MathUtil::Rectangle<u32> rect{}; - - Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = - static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); - match_subrect.stride = - static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); +void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { + if (Settings::values.use_accurate_framebuffers) { + // If enabled, always flush dirty surfaces + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); + } else { + // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads + // and flushes are very slow and do not seem to improve accuracy + const auto& params{surface->GetSurfaceParams()}; + Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); } - - return std::make_tuple(match_surface, rect); } -void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { + if (params.addr == 0 || params.height * params.width == 0) { + return {}; + } - SurfaceRegions regions; - for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; + // Check for an exact match in existing surfaces + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; + Surface surface; + if (search != surface_cache.end()) { + surface = search->second; + if (Settings::values.use_accurate_framebuffers) { + // Reload the surface from Switch memory + LoadSurface(surface); } + } else { + surface = std::make_shared<CachedSurface>(params); + RegisterSurface(surface); + LoadSurface(surface); } - for (auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, - u64 size) { - if (size == 0) - return; - - const SurfaceInterval validate_interval(addr, addr + size); - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } + return surface; +} - while (true) { - const auto it = surface->invalid_regions.find(validate_interval); - if (it == surface->invalid_regions.end()) - break; - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = *surface; - - Surface copy_surface = - FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - surface->invalid_regions.erase(copy_interval); - continue; +Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { + // Tries to find the GPU address of a framebuffer based on the CPU address. This is because + // final output framebuffers are specified by CPU address, but internally our GPU cache uses + // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU + // address to the one provided. This is obviously not great, and won't work if the + // framebuffer overlaps surfaces. + + std::vector<Surface> surfaces; + for (const auto& surface : surface_cache) { + const auto& params = surface.second->GetSurfaceParams(); + const VAddr surface_cpu_addr = params.GetCpuAddr(); + if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { + ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); + surfaces.push_back(surface.second); } - - // Load data from Switch memory - FlushRegion(params.addr, params.size); - surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); - surface->invalid_regions.erase(params.GetInterval()); } -} -void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { - if (size == 0) - return; + if (surfaces.empty()) { + return {}; + } - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; + ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported"); - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to access - // that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; + return surfaces[0]; +} - if (flush_surface != nullptr && surface != flush_surface) - continue; +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { + // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should + // probably implement this in the future, but for now, the `use_accurate_framebufers` setting + // can be used to always flush. +} - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { + for (const auto& pair : surface_cache) { + const auto& surface{pair.second}; + const auto& params{surface->GetSurfaceParams()}; - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); + if (params.IsOverlappingRegion(addr, size)) { + UnregisterSurface(surface); } - surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); - flushed_intervals += interval; } - // Reset dirty regions - dirty_regions -= flushed_intervals; } -void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); -} +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; -void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, - const Surface& region_owner) { - if (size == 0) + if (search != surface_cache.end()) { + // Registered already return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); } - for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - - // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures - if (cached_surface->type == SurfaceType::Fill && - cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } - } - - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); - } - - remove_surfaces.clear(); + surface_cache[surface_key] = surface; + UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); } -Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { - Surface surface = std::make_shared<CachedSurface>(); - static_cast<SurfaceParams&>(*surface) = params; - - surface->texture.Create(); - - surface->gl_buffer_size = 0; - surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, - GetFormatTuple(surface->pixel_format, surface->component_type), - surface->GetScaledWidth(), surface->GetScaledHeight()); - - return surface; -} +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; -void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { - if (surface->registered) { + if (search == surface_cache.end()) { + // Unregistered already return; } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - UpdatePagesCachedCount(surface->addr, surface->size, 1); + + UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); + surface_cache.erase(search); } -void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { - if (!surface->registered) { - return; - } - surface->registered = false; - UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); } void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9da945e19..85e7c8888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -1,57 +1,26 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include <array> +#include <map> #include <memory> -#include <set> -#include <tuple> -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-local-typedefs" -#endif +#include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/icl/interval_set.hpp> -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#include <boost/optional.hpp> -#include <glad/glad.h> -#include "common/assert.h" -#include "common/common_funcs.h" #include "common/common_types.h" +#include "common/hash.h" #include "common/math_util.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" -struct CachedSurface; +class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSet = std::set<Surface>; - -using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; -using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; -using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; - -using SurfaceInterval = SurfaceCache::interval_type; -static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && - std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), - "incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; - using PageMap = boost::icl::interval_map<u64, int>; -enum class ScaleMatch { - Exact, // only accept same res scale - Upscale, // only allow higher scale than params - Ignore // accept every scaled res -}; - struct SurfaceParams { enum class PixelFormat { ABGR8 = 0, @@ -93,10 +62,10 @@ struct SurfaceParams { /** * Gets the compression factor for the specified PixelFormat. This applies to just the * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed texture - * formats. + * compressed image. This is used for maintaining proper surface sizes for compressed + * texture formats. */ - static constexpr u32 GetCompresssionFactor(PixelFormat format) { + static constexpr u32 GetCompressionFactor(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; @@ -112,15 +81,12 @@ struct SurfaceParams { 4, // DXT23 4, // DXT45 4, // DXN1 - 1, // ASTC_2D_4X4 + 4, // ASTC_2D_4X4 }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); return compression_factor_table[static_cast<size_t>(format)]; } - u32 GetCompresssionFactor() const { - return GetCompresssionFactor(pixel_format); - } static constexpr u32 GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) @@ -165,25 +131,6 @@ struct SurfaceParams { } } - static bool IsFormatASTC(PixelFormat format) { - switch (format) { - case PixelFormat::ASTC_2D_4X4: - return true; - default: - return false; - } - } - - static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { // TODO(Subv): Properly implement this switch (format) { @@ -276,36 +223,16 @@ struct SurfaceParams { } } - static ComponentType ComponentTypeFromGPUPixelFormat( - Tegra::FramebufferConfig::PixelFormat format) { + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return ComponentType::UNorm; + return PixelFormat::ABGR8; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } - static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { - SurfaceType a_type = GetFormatType(pixel_format_a); - SurfaceType b_type = GetFormatType(pixel_format_b); - - if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { - return true; - } - - if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { - return true; - } - - if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { - return true; - } - - return false; - } - static SurfaceType GetFormatType(PixelFormat pixel_format) { if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { return SurfaceType::ColorTexture; @@ -317,168 +244,101 @@ struct SurfaceParams { return SurfaceType::Invalid; } - /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" - /// and "pixel_format" - void UpdateParams() { - if (stride == 0) { - stride = width; - } - type = GetFormatType(pixel_format); - size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) - : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); - end = addr + size; - } - - SurfaceInterval GetInterval() const { - return SurfaceInterval::right_open(addr, end); - } - - // Returns the outer rectangle containing "interval" - SurfaceParams FromInterval(SurfaceInterval interval) const; - - SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; - - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - - /** - * Gets the actual width (in pixels) of the surface. This is provided because `width` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `width` is actually the compressed width. - */ - u32 GetActualWidth() const { - return width * GetCompresssionFactor(); - } - - /** - * Gets the actual height (in pixels) of the surface. This is provided because `height` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `height` is actually the compressed height. - */ - u32 GetActualHeight() const { - return height * GetCompresssionFactor(); - } + /// Returns the rectangle corresponding to this surface + MathUtil::Rectangle<u32> GetRect() const; - u32 GetScaledWidth() const { - return width * res_scale; + /// Returns the size of this surface in bytes, adjusted for compression + size_t SizeInBytes() const { + const u32 compression_factor{GetCompressionFactor(pixel_format)}; + ASSERT(width % compression_factor == 0); + ASSERT(height % compression_factor == 0); + return (width / compression_factor) * (height / compression_factor) * + GetFormatBpp(pixel_format) / CHAR_BIT; } - u32 GetScaledHeight() const { - return height * res_scale; - } + /// Returns the CPU virtual address for this surface + VAddr GetCpuAddr() const; - MathUtil::Rectangle<u32> GetRect() const { - return {0, height, width, 0}; + /// Returns true if the specified region overlaps with this surface's region in Switch memory + bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { + return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); } - MathUtil::Rectangle<u32> GetScaledRect() const { - return {0, GetScaledHeight(), GetScaledWidth(), 0}; - } + /// Creates SurfaceParams from a texture configation + static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + + /// Creates SurfaceParams from a framebuffer configation + static SurfaceParams CreateForFramebuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + + Tegra::GPUVAddr addr; + bool is_tiled; + u32 block_height; + PixelFormat pixel_format; + ComponentType component_type; + SurfaceType type; + u32 width; + u32 height; + u32 unaligned_height; + size_t size_in_bytes; +}; - u64 PixelsInBytes(u64 size) const { - return size * CHAR_BIT / GetFormatBpp(pixel_format); +/// Hashable variation of SurfaceParams, used for a key in the surface cache +struct SurfaceKey : Common::HashableStruct<SurfaceParams> { + static SurfaceKey Create(const SurfaceParams& params) { + SurfaceKey res; + res.state = params; + return res; } +}; - u64 BytesInPixels(u64 pixels) const { - return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; +namespace std { +template <> +struct hash<SurfaceKey> { + size_t operator()(const SurfaceKey& k) const { + return k.Hash(); } - - VAddr GetCpuAddr() const; - - bool ExactMatch(const SurfaceParams& other_surface) const; - bool CanSubRect(const SurfaceParams& sub_surface) const; - bool CanExpand(const SurfaceParams& expanded_surface) const; - bool CanTexCopy(const SurfaceParams& texcopy_params) const; - - MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; - MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; - - Tegra::GPUVAddr addr = 0; - Tegra::GPUVAddr end = 0; - boost::optional<VAddr> cpu_addr; - u64 size = 0; - - u32 width = 0; - u32 height = 0; - u32 stride = 0; - u32 block_height = 0; - u16 res_scale = 1; - - bool is_tiled = false; - PixelFormat pixel_format = PixelFormat::Invalid; - SurfaceType type = SurfaceType::Invalid; - ComponentType component_type = ComponentType::Invalid; }; +} // namespace std -struct CachedSurface : SurfaceParams { - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } +class CachedSurface final { +public: + CachedSurface(const SurfaceParams& params); - bool IsSurfaceFullyInvalid() const { - return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + const OGLTexture& Texture() const { + return texture; } - bool registered = false; - SurfaceRegions invalid_regions; - - u64 fill_size = 0; /// Number of bytes to read from fill_data - std::array<u8, 4> fill_data; - - OGLTexture texture; - - static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { - if (format == PixelFormat::Invalid) + static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) { + if (format == SurfaceParams::PixelFormat::Invalid) return 0; return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; } - std::unique_ptr<u8[]> gl_buffer; - size_t gl_buffer_size = 0; + const SurfaceParams& GetSurfaceParams() const { + return params; + } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); - void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); + void LoadGLBuffer(); + void FlushGLBuffer(); // Upload/Download data in gl_buffer in/to this surface's texture - void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); - void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); + void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + +private: + OGLTexture texture; + std::vector<u8> gl_buffer; + SurfaceParams params; }; -class RasterizerCacheOpenGL : NonCopyable { +class RasterizerCacheOpenGL final : NonCopyable { public: RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); - /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, - const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); - - void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, - GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); - - /// Copy one surface's region to another - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval); - - /// Load a texture from Switch memory to OpenGL and cache it (if not already cached) - Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Tries to find a framebuffer GPU address based on the provided CPU address - boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; - - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from - /// Switch memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); @@ -486,29 +346,21 @@ public: SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport); - /// Get a surface that matches the fill config - Surface GetFillSurface(const void* config); + /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory + void MarkSurfaceAsDirty(const Surface& surface); - /// Get a surface that matches a "texture copy" display transfer config - SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + /// Tries to find a framebuffer GPU address based on the provided CPU address + Surface TryFindFramebufferSurface(VAddr cpu_addr) const; /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); - - /// Mark region as being invalidated by region_owner (nullptr if Switch memory) - void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); + void FlushRegion(Tegra::GPUVAddr addr, size_t size); - /// Flush all cached resources tracked by this cache manager - void FlushAll(); + /// Mark the specified region as being invalidated + void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); private: - void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); - - /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); - - /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); + void LoadSurface(const Surface& surface); + Surface GetSurface(const SurfaceParams& params); /// Register surface into the cache void RegisterSurface(const Surface& surface); @@ -519,18 +371,9 @@ private: /// Increase/decrease the number of surface in pages touching the specified region void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); - SurfaceCache surface_cache; + std::unordered_map<SurfaceKey, Surface> surface_cache; PageMap cached_pages; - SurfaceMap dirty_regions; - SurfaceSet remove_surfaces; OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; - - OGLVertexArray attributeless_vao; - OGLBuffer d24s8_abgr_buffer; - GLsizeiptr d24s8_abgr_buffer_size; - OGLProgram d24s8_abgr_shader; - GLint d24s8_abgr_tbo_size_u_id; - GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f33766bfd..e3bb2cbb8 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, Memory::FlushMode::Flush); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 2cc6d9a00..21f0d298c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -27,7 +27,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - MathUtil::Rectangle<float> display_texcoords; + const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 8316db708..cd7986efa 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -84,6 +84,8 @@ void Config::ReadValues() { qt_config->beginGroup("Renderer"); Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); + Settings::values.use_accurate_framebuffers = + qt_config->value("use_accurate_framebuffers", false).toBool(); Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); @@ -184,6 +186,7 @@ void Config::SaveValues() { qt_config->beginGroup("Renderer"); qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit); + qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers); // Cast to double because Qt's written float values are not human-readable qt_config->setValue("bg_red", (double)Settings::values.bg_red); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 47b9b6e95..7664880d5 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -59,11 +59,13 @@ void ConfigureGraphics::setConfiguration() { ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit); + ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers); } void ConfigureGraphics::applyConfiguration() { Settings::values.resolution_factor = ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked(); + Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked(); Settings::Apply(); } diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 366931a9a..7d092df03 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -30,6 +30,13 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_accurate_framebuffers"> + <property name="text"> + <string>Use accurate framebuffers (slow)</string> + </property> + </widget> + </item> + <item> <layout class="QHBoxLayout" name="horizontalLayout"> <item> <widget class="QLabel" name="label"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index ee6e4d658..150915c17 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -98,6 +98,8 @@ void Config::ReadValues() { (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); Settings::values.toggle_framelimit = sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); + Settings::values.use_accurate_framebuffers = + sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false); Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 1c438c3f5..5896971d4 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -102,6 +102,10 @@ resolution_factor = # 0 (default): Off, 1: On use_vsync = +# Whether to use accurate framebuffers +# 0 (default): Off (fast), 1 : On (slow) +use_accurate_framebuffers = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 1.0 for all. bg_red = |