From 9764c13d6d2977903f407761b27d847c0056e1c4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:25:23 -0300 Subject: video_core: Rewrite the texture cache The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues. --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 7 +- src/video_core/renderer_opengl/gl_buffer_cache.h | 8 +- src/video_core/renderer_opengl/gl_device.cpp | 64 +- src/video_core/renderer_opengl/gl_device.h | 13 +- .../renderer_opengl/gl_fence_manager.cpp | 2 +- src/video_core/renderer_opengl/gl_fence_manager.h | 4 +- .../renderer_opengl/gl_framebuffer_cache.cpp | 85 -- .../renderer_opengl/gl_framebuffer_cache.h | 68 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 504 ++++--- src/video_core/renderer_opengl/gl_rasterizer.h | 63 +- .../renderer_opengl/gl_resource_manager.cpp | 2 +- .../renderer_opengl/gl_sampler_cache.cpp | 52 - src/video_core/renderer_opengl/gl_sampler_cache.h | 25 - src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - .../renderer_opengl/gl_shader_decompiler.cpp | 8 +- .../renderer_opengl/gl_shader_decompiler.h | 4 +- .../renderer_opengl/gl_shader_manager.cpp | 15 + src/video_core/renderer_opengl/gl_shader_manager.h | 6 + .../renderer_opengl/gl_state_tracker.cpp | 7 + src/video_core/renderer_opengl/gl_state_tracker.h | 15 +- .../renderer_opengl/gl_stream_buffer.cpp | 32 +- src/video_core/renderer_opengl/gl_stream_buffer.h | 19 +- .../renderer_opengl/gl_texture_cache.cpp | 1454 ++++++++++++-------- src/video_core/renderer_opengl/gl_texture_cache.h | 286 ++-- src/video_core/renderer_opengl/maxwell_to_gl.h | 13 + src/video_core/renderer_opengl/renderer_opengl.cpp | 49 +- src/video_core/renderer_opengl/renderer_opengl.h | 1 + src/video_core/renderer_opengl/util_shaders.cpp | 224 +++ src/video_core/renderer_opengl/util_shaders.h | 51 + src/video_core/renderer_opengl/utils.cpp | 42 - src/video_core/renderer_opengl/utils.h | 16 - 31 files changed, 1815 insertions(+), 1325 deletions(-) delete mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.h delete mode 100644 src/video_core/renderer_opengl/gl_sampler_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_sampler_cache.h create mode 100644 src/video_core/renderer_opengl/util_shaders.cpp create mode 100644 src/video_core/renderer_opengl/util_shaders.h delete mode 100644 src/video_core/renderer_opengl/utils.cpp delete mode 100644 src/video_core/renderer_opengl/utils.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 60735d502..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, std::size_t stream_size_) - : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, - std::make_unique(device_, stream_size_, true)}, - device{device_} { + const Device& device_, OGLStreamBuffer& stream_buffer_, + StateTracker& state_tracker) + : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { if (!device.HasFastBufferSubData()) { return; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 95251e26b..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -22,6 +22,7 @@ namespace OpenGL { class Device; class OGLStreamBuffer; class RasterizerOpenGL; +class StateTracker; class Buffer : public VideoCommon::BufferBlock { public: @@ -52,9 +53,10 @@ private: using GenericBufferCache = VideoCommon::BufferCache; class OGLBufferCache final : public GenericBufferCache { public: - explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, std::size_t stream_size_); + explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const Device& device, OGLStreamBuffer& stream_buffer, + StateTracker& state_tracker); ~OGLBufferCache(); BufferInfo GetEmptyBuffer(std::size_t) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..b24179d59 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1; constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = { +constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, - GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; - -constexpr std::array LimitSSBOs = { + GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, +}; +constexpr std::array LIMIT_SSBOS = { GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; - -constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; - -constexpr std::array LimitImages = { + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, +}; +constexpr std::array LIMIT_SAMPLERS = { + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS, + GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, +}; +constexpr std::array LIMIT_IMAGES = { GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; + GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, +}; template T GetInteger(GLenum pname) { @@ -76,8 +80,8 @@ std::vector GetExtensions() { return extensions; } -bool HasExtension(const std::vector& images, std::string_view extension) { - return std::find(images.begin(), images.end(), extension) != images.end(); +bool HasExtension(std::span extensions, std::string_view extension) { + return std::ranges::find(extensions, extension) != extensions.end(); } u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { std::array BuildMaxUniformBuffers() noexcept { std::array max; - std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), - [](GLenum pname) { return GetInteger(pname); }); + std::ranges::transform(LIMIT_UBOS, max.begin(), + [](GLenum pname) { return GetInteger(pname); }); return max; } @@ -115,9 +119,10 @@ std::array BuildBaseBindin for (std::size_t i = 0; i < NumStages; ++i) { const std::size_t stage = stage_swizzle[i]; bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; + Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), + Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), + Extract(base_samplers, num_samplers, total_samplers / NumStages, + LIMIT_SAMPLERS[stage])}; } u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); @@ -130,7 +135,7 @@ std::array BuildBaseBindin // Reserve at least 4 image bindings on the fragment stage. bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); + Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); // This is guaranteed to be at least 1. const u32 total_extracted_images = num_images / (NumStages - 1); @@ -142,7 +147,7 @@ std::array BuildBaseBindin continue; } bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); + Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); } // Compute doesn't care about any of this. @@ -188,6 +193,11 @@ bool IsASTCSupported() { return true; } +[[nodiscard]] bool IsDebugToolAttached(std::span extensions) { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); +} + } // Anonymous namespace Device::Device() @@ -206,9 +216,8 @@ Device::Device() "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - - uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); - shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); + uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); @@ -224,6 +233,7 @@ Device::Device() has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + has_debugging_tool_attached = IsDebugToolAttached(extensions); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..13e66846c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -36,11 +36,11 @@ public: return GetBaseBindings(static_cast(shader_type)); } - std::size_t GetUniformBufferAlignment() const { + size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } - std::size_t GetShaderStorageBufferAlignment() const { + size_t GetShaderStorageBufferAlignment() const { return shader_storage_alignment; } @@ -104,6 +104,10 @@ public: return has_nv_viewport_array2; } + bool HasDebuggingToolAttached() const { + return has_debugging_tool_attached; + } + bool UseAssemblyShaders() const { return use_assembly_shaders; } @@ -118,8 +122,8 @@ private: std::array max_uniform_buffers{}; std::array base_bindings{}; - std::size_t uniform_buffer_alignment{}; - std::size_t shader_storage_alignment{}; + size_t uniform_buffer_alignment{}; + size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; @@ -135,6 +139,7 @@ private: bool has_precise_bug{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6040646cb..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -46,7 +46,7 @@ void GLInnerFence::Wait() { } FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, + Tegra::GPU& gpu_, TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, QueryCache& query_cache_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 39ca6125b..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -33,12 +33,12 @@ private: using Fence = std::shared_ptr; using GenericFenceManager = - VideoCommon::FenceManager; + VideoCommon::FenceManager; class FenceManagerOpenGL final : public GenericFenceManager { public: explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, + TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, QueryCache& query_cache_); protected: diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using VideoCore::Surface::SurfaceType; - -FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; - -FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; - -GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { - const auto [entry, is_cache_miss] = cache.try_emplace(key); - auto& framebuffer{entry->second}; - if (is_cache_miss) { - framebuffer = CreateFramebuffer(key); - } - return framebuffer.handle; -} - -OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { - OGLFramebuffer framebuffer; - framebuffer.Create(); - - // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); - - if (key.zeta) { - const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; - const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; - key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - } - - std::size_t num_buffers = 0; - std::array targets; - - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - if (!key.colors[index]) { - targets[index] = GL_NONE; - continue; - } - const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast(index); - key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - - const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; - targets[index] = GL_COLOR_ATTACHMENT0 + attachment; - num_buffers = index + 1; - } - - if (num_buffers > 0) { - glDrawBuffers(static_cast(num_buffers), std::data(targets)); - } else { - glDrawBuffer(GL_NONE); - } - - return framebuffer; -} - -std::size_t FramebufferCacheKey::Hash() const noexcept { - std::size_t hash = std::hash{}(zeta); - for (const auto& color : colors) { - hash ^= std::hash{}(color); - } - hash ^= static_cast(color_attachments) << 16; - return hash; -} - -bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(colors, zeta, color_attachments) == - std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace OpenGL { - -constexpr std::size_t BitsPerAttachment = 4; - -struct FramebufferCacheKey { - View zeta; - std::array colors; - u32 color_attachments = 0; - - std::size_t Hash() const noexcept; - - bool operator==(const FramebufferCacheKey& rhs) const noexcept; - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - void SetAttachment(std::size_t index, u32 attachment) { - color_attachments |= attachment << (BitsPerAttachment * index); - } -}; - -} // namespace OpenGL - -namespace std { - -template <> -struct hash { - std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace OpenGL { - -class FramebufferCacheOpenGL { -public: - FramebufferCacheOpenGL(); - ~FramebufferCacheOpenGL(); - - GLuint GetFramebuffer(const FramebufferCacheKey& key); - -private: - OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - - std::unordered_map cache; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e58e84759..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,12 +25,15 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { -constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; -constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = +constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; -constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = +constexpr size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; + +constexpr size_t MAX_TEXTURES = 192; +constexpr size_t MAX_IMAGES = 48; + +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; template -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - ShaderType shader_type, std::size_t index = 0) { +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + ShaderType shader_type, size_t index = 0) { if constexpr (std::is_same_v) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry const u32 offset_2 = entry.secondary_offset; const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return engine.GetTextureInfo(handle_1 | handle_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(handle); - } - - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 offset = entry.offset + static_cast(index * gpu_profile.GetTextureHandlerSize()); - if constexpr (std::is_same_v) { - return engine.GetStageTexture(shader_type, offset); - } else { - return engine.GetTexture(offset); + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, if (!entry.IsIndirect()) { return entry.GetSize(); } - if (buffer.size > Maxwell::MaxConstBufferSize) { LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, Maxwell::MaxConstBufferSize); @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss reinterpret_cast(ssbos)); } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::Buffer; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Core::Memory::Memory& cpu_memory_, const Device& device_, ScreenInfo& screen_info_, ProgramManager& program_manager_, StateTracker& state_tracker_) - : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), + stream_buffer(device, state_tracker), + texture_cache_runtime(device, program_manager, state_tracker), + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), - buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), + buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), async_shaders(emu_window_) { - CheckExtensions(); - unified_uniform_buffer.Create(); glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra nullptr, 0); } } - if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); } @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::CheckExtensions() { - if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { - LOG_WARNING( - Render_OpenGL, - "Anisotropic filter is not supported! This can cause graphical issues in some games."); - } -} - void RasterizerOpenGL::SetupVertexFormat() { auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { return info.offset; } -void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { +void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); u32 clip_distances = 0; + std::array shaders{}; + image_view_indices.clear(); + sampler_handles.clear(); + + texture_cache.SynchronizeGraphicsDescriptors(); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = maxwell3d.regs.shader_config[index]; const auto program{static_cast(index)}; @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } continue; } - // Currently this stages are not supported in the OpenGL backend. // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL if (program == Maxwell::ShaderProgram::TesselationControl || @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, shader_config.enable.Value(), shader_config.offset); + break; } // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; + const size_t stage = index == 0 ? 0 : index - 1; + shaders[stage] = shader; + SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); + SetupDrawTextures(shader, stage); + SetupDrawImages(shader, stage); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { ++index; } } - SyncClipEnabled(clip_distances); maxwell3d.dirty.flags[Dirty::Shaders] = false; + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader* const shader = shaders[stage]; + if (shader) { + const auto base = device.GetBaseBindings(stage); + BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, + texture_index, image_index); + } + } } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s shader_cache.LoadDiskCache(title_id, stop_loading, callback); } -void RasterizerOpenGL::ConfigureFramebuffers() { - MICROPROFILE_SCOPE(OpenGL_Framebuffer); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { - return; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - View depth_surface = texture_cache.GetDepthBufferSurface(true); - - const auto& regs = maxwell3d.regs; - UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); - - // Bind the framebuffer surfaces - FramebufferCacheKey key; - const auto colors_count = static_cast(regs.rt_control.count); - for (std::size_t index = 0; index < colors_count; ++index) { - View color_surface{texture_cache.GetColorBufferSurface(index, true)}; - if (!color_surface) { - continue; - } - // Assume that a surface will be written to if it is used as a framebuffer, even - // if the shader doesn't actually write to it. - texture_cache.MarkColorBufferInUse(index); - - key.SetAttachment(index, regs.rt_control.GetMap(index)); - key.colors[index] = std::move(color_surface); - } - - if (depth_surface) { - // Assume that a surface will be written to if it is used as a framebuffer, even if - // the shader doesn't actually write to it. - texture_cache.MarkDepthBufferInUse(); - key.zeta = std::move(depth_surface); - } - - texture_cache.GuardRenderTargets(false); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - -void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { - const auto& regs = maxwell3d.regs; - - texture_cache.GuardRenderTargets(true); - View color_surface; - - if (using_color) { - // Determine if we have to preserve the contents. - // First we have to make sure all clear masks are enabled. - bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || - !regs.clear_buffers.B || !regs.clear_buffers.A; - const std::size_t index = regs.clear_buffers.RT; - if (regs.clear_flags.scissor) { - // Then we have to confirm scissor testing clears the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.rt[index].width; - preserve_contents |= scissor.max_y < regs.rt[index].height; - } - - color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); - texture_cache.MarkColorBufferInUse(index); - } - - View depth_surface; - if (using_depth_stencil) { - bool preserve_contents = false; - if (regs.clear_flags.scissor) { - // For depth stencil clears we only have to confirm scissor test covers the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.zeta_width; - preserve_contents |= scissor.max_y < regs.zeta_height; - } - - depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); - texture_cache.MarkDepthBufferInUse(); - } - texture_cache.GuardRenderTargets(false); - - FramebufferCacheKey key; - key.colors[0] = std::move(color_surface); - key.zeta = std::move(depth_surface); - - state_tracker.NotifyFramebuffer(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - void RasterizerOpenGL::Clear() { if (!maxwell3d.ShouldExecute()) { return; @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() { regs.clear_buffers.A) { use_color = true; - state_tracker.NotifyColorMask0(); - glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, + const GLuint index = regs.clear_buffers.RT; + state_tracker.NotifyColorMask(index); + glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); // TODO(Rodrigo): Determine if clamping is used on clears @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() { state_tracker.NotifyScissor0(); glDisablei(GL_SCISSOR_TEST, 0); } - UNIMPLEMENTED_IF(regs.clear_flags.viewport); - ConfigureClearFramebuffer(use_color, use_depth || use_stencil); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + } if (use_color) { - glClearBufferfv(GL_COLOR, 0, regs.clear_color); + glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } - if (use_depth && use_stencil) { glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); } else if (use_depth) { @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); // Prepare the vertex array. - const bool invalidated = buffer_cache.Map(buffer_size); - - if (invalidated) { - // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::VertexBuffers] = true; - for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { - dirty[index] = true; - } - } + buffer_cache.Map(buffer_size); // Prepare vertex array format. SetupVertexFormat(); @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } // Setup shaders and their used resources. - texture_cache.GuardSamplers(true); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - SetupShaders(primitive_mode); - texture_cache.GuardSamplers(false); - - ConfigureFramebuffers(); + auto lock = texture_cache.AcquireLock(); + SetupShaders(); // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); program_manager.BindGraphicsPipeline(); - if (texture_cache.TextureBarrier()) { - glTextureBarrier(); - } - + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Acquire(); current_cbuf = 0; - auto kernel = shader_cache.GetComputeKernel(code_addr); - program_manager.BindCompute(kernel->GetHandle()); + Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - SetupComputeTextures(kernel); - SetupComputeImages(kernel); + auto lock = texture_cache.AcquireLock(); + BindComputeTextures(kernel); - const std::size_t buffer_size = - Tegra::Engines::KeplerCompute::NumConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); + const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); SetupComputeConstBuffers(kernel); @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = kepler_compute.launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerOpenGL::SyncGuestHost() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); shader_cache.SyncGuestHost(); } +void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + shader_cache.OnCPUWrite(addr, size); +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() { GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); } +void RasterizerOpenGL::FragmentBarrier() { + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); +} + +void RasterizerOpenGL::TiledCacheBarrier() { + glTextureBarrier(); +} + void RasterizerOpenGL::FlushCommands() { // Only flush when we have commands queued to OpenGL. if (num_queued_commands == 0) { @@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() { // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. num_queued_commands = 0; + fence_manager.TickFrame(); buffer_cache.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { - if (!framebuffer_addr) { - return {}; + if (framebuffer_addr == 0) { + return false; } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { - return {}; + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; + if (!image_view) { + return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); + // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - if (params.pixel_format != pixel_format) { - LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); - } + screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); + return true; +} - screen_info.display_texture = surface->GetTexture(); - screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; +void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { + image_view_indices.clear(); + sampler_handles.clear(); - return true; + texture_cache.SynchronizeComputeDescriptors(); + + SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + program_manager.BindCompute(kernel->GetHandle()); + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); +} + +void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, + GLuint base_image, size_t& image_view_index, + size_t& texture_index, size_t& image_index) { + const GLuint* const samplers = sampler_handles.data() + texture_index; + const GLuint* const textures = texture_handles.data() + texture_index; + const GLuint* const images = image_handles.data() + image_index; + + const size_t num_samplers = entries.samplers.size(); + for (const auto& sampler : entries.samplers) { + for (size_t i = 0; i < sampler.size; ++i) { + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); + texture_handles[texture_index++] = handle; + } + } + const size_t num_images = entries.images.size(); + for (size_t unit = 0; unit < num_images; ++unit) { + // TODO: Mark as modified + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); + image_handles[image_index] = handle; + ++image_index; + } + if (num_samplers > 0) { + glBindSamplers(base_texture, static_cast(num_samplers), samplers); + glBindTextures(base_texture, static_cast(num_samplers), textures); + } + if (num_images > 0) { + glBindImageTextures(base_image, static_cast(num_images), images); + } } void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; - const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e } } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = device.GetBaseBindings(stage_index).sampler; +void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().samplers) { const auto shader_type = static_cast(stage_index); - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const auto handle = + GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); + const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = 0; +void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : kernel->GetEntries().samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); + for (size_t i = 0; i < entry.size; ++i) { + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); + const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - const auto view = texture_cache.GetTextureSurface(texture.tic, entry); - if (!view) { - // Can occur when texture addr is null or its memory is unmapped/invalid - glBindSampler(binding, 0); - glBindTextureUnit(binding, 0); - return; - } - const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - glBindTextureUnit(binding, handle); - if (!view->GetSurfaceParams().IsBuffer()) { - glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); - } -} - -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { - u32 binding = device.GetBaseBindings(stage_index).image; +void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().images) { const auto shader_type = static_cast(stage_index); - const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(binding++, tic, entry); + const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupComputeImages(Shader* shader) { - u32 binding = 0; +void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : shader->GetEntries().images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; - SetupImage(binding++, tic, entry); + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, - const ImageEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - if (!view) { - glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); - return; - } - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); -} - void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() { flags[Dirty::PointSize] = false; oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); + oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - if (maxwell3d.regs.vp_point_size.enable) { - // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. - glEnable(GL_PROGRAM_POINT_SIZE); - return; - } - - // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid - // in OpenGL). glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); - glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index de28cff15..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,12 +7,13 @@ #include #include #include -#include #include #include #include #include +#include + #include #include "common/common_types.h" @@ -23,16 +24,14 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" @@ -51,7 +50,7 @@ class MemoryManager; namespace OpenGL { struct ScreenInfo; -struct DrawParameters; +struct ShaderEntries; struct BindlessSSBO { GLuint64EXT address; @@ -79,15 +78,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -108,11 +110,14 @@ public: } private: - /// Configures the color and depth framebuffer states. - void ConfigureFramebuffers(); + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + + void BindComputeTextures(Shader* kernel); - /// Configures the color and depth framebuffer for clearing. - void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); + void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, + size_t& image_view_index, size_t& texture_index, size_t& image_index); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); @@ -136,23 +141,16 @@ private: size_t size, BindlessSSBO* ssbo); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, Shader* shader); + void SetupDrawTextures(const Shader* shader, size_t stage_index); /// Configures the textures used in a compute shader. - void SetupComputeTextures(Shader* kernel); - - /// Configures a texture. - void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry); + void SetupComputeTextures(const Shader* kernel); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, Shader* shader); + void SetupDrawImages(const Shader* shader, size_t stage_index); /// Configures images in a compute shader. - void SetupComputeImages(Shader* shader); - - /// Configures an image. - void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); + void SetupComputeImages(const Shader* shader); /// Syncs the viewport and depth range to match the guest state void SyncViewport(); @@ -227,9 +225,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - /// Check for extension that are not strictly required but are needed for correct emulation - void CheckExtensions(); - std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; @@ -242,7 +237,7 @@ private: GLintptr SetupIndexBuffer(); - void SetupShaders(GLenum primitive_mode); + void SetupShaders(); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -254,19 +249,21 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - TextureCacheOpenGL texture_cache; + OGLStreamBuffer stream_buffer; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; ShaderCacheOpenGL shader_cache; - SamplerCacheOpenGL sampler_cache; - FramebufferCacheOpenGL framebuffer_cache; QueryCache query_cache; OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; VideoCommon::Shader::AsyncShaders async_shaders; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - - GLint vertex_binding = 0; + boost::container::static_vector image_view_indices; + std::array image_view_ids; + boost::container::static_vector sampler_handles; + std::array texture_handles; + std::array image_handles; std::array transform_feedback_buffers; @@ -280,7 +277,7 @@ private: std::size_t current_cbuf = 0; OGLBuffer unified_uniform_buffer; - /// Number of commands queued to the OpenGL driver. Reseted on flush. + /// Number of commands queued to the OpenGL driver. Resetted on flush. std::size_t num_queued_commands = 0; u32 last_clip_distance_mask = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -71,7 +71,7 @@ void OGLSampler::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenSamplers(1, &handle); + glCreateSamplers(1, &handle); } void OGLSampler::Release() { diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - -namespace OpenGL { - -SamplerCacheOpenGL::SamplerCacheOpenGL() = default; - -SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; - -OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - OGLSampler sampler; - sampler.Create(); - - const GLuint sampler_id{sampler.handle}; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); - } else { - LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); - } - - return sampler; -} - -GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { - return sampler.handle; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/sampler_cache.h" - -namespace OpenGL { - -class SamplerCacheOpenGL final : public VideoCommon::SamplerCache { -public: - explicit SamplerCacheOpenGL(); - ~SamplerCacheOpenGL(); - -protected: - OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - GLuint ToSamplerType(const OGLSampler& sampler) const override; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eabfdea5d..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -27,7 +27,6 @@ #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ccbdfe967..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::TextureType; -using VideoCommon::Shader::BuildTransformFeedback; -using VideoCommon::Shader::Registry; -using namespace std::string_literals; using namespace VideoCommon::Shader; +using namespace std::string_literals; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; @@ -2753,11 +2751,11 @@ private: } } - std::string GetSampler(const Sampler& sampler) const { + std::string GetSampler(const SamplerEntry& sampler) const { return AppendSuffix(sampler.index, "sampler"); } - std::string GetImage(const Image& image) const { + std::string GetImage(const ImageEntry& image) const { return AppendSuffix(image.index, "image"); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c4ff47875..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,8 +20,8 @@ namespace OpenGL { class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::Sampler; -using ImageEntry = VideoCommon::Shader::Image; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() { } } +void ProgramManager::BindHostCompute(GLuint program) { + if (use_assembly_programs) { + glDisable(GL_COMPUTE_PROGRAM_NV); + } + glUseProgram(program); + is_graphics_bound = false; +} + +void ProgramManager::RestoreGuestCompute() { + if (use_assembly_programs) { + glEnable(GL_COMPUTE_PROGRAM_NV); + glUseProgram(0); + } +} + void ProgramManager::UseVertexShader(GLuint program) { if (use_assembly_programs) { BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,6 +45,12 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); + /// Binds an OpenGL GLSL program object unsynchronized with the guest state. + void BindHostCompute(GLuint program); + + /// Rewinds BindHostCompute state changes. + void RestoreGuestCompute(); + void UseVertexShader(GLuint program); void UseGeometryShader(GLuint program); void UseFragmentShader(GLuint program); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 45f4fc565..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} } } +void StateTracker::InvalidateStreamBuffer() { + flags[Dirty::VertexBuffers] = true; + for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { + flags[index] = true; + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -92,6 +92,8 @@ class StateTracker { public: explicit StateTracker(Tegra::GPU& gpu); + void InvalidateStreamBuffer(); + void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { return; @@ -100,6 +102,14 @@ public: glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); } + void BindFramebuffer(GLuint new_framebuffer) { + if (framebuffer == new_framebuffer) { + return; + } + framebuffer = new_framebuffer; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + } + void NotifyScreenDrawVertexArray() { flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; @@ -129,9 +139,9 @@ public: flags[OpenGL::Dirty::Scissor0] = true; } - void NotifyColorMask0() { + void NotifyColorMask(size_t index) { flags[OpenGL::Dirty::ColorMasks] = true; - flags[OpenGL::Dirty::ColorMask0] = true; + flags[OpenGL::Dirty::ColorMask0 + index] = true; } void NotifyBlend0() { @@ -190,6 +200,7 @@ public: private: Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; + GLuint framebuffer = 0; GLuint index_buffer = 0; }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) - : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) + : state_tracker{state_tracker_} { gl_buffer.Create(); - GLsizeiptr allocate_size = size; - if (vertex_data_usage) { - // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer - // read position is near the end and is an out-of-bound access to the vertex buffer. This is - // probably a bug in the driver and is related to the usage of vec3 attributes in the - // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the - // crash. - allocate_size *= 2; - } - static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); mapped_ptr = static_cast( - glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() { gl_buffer.Release(); } -std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { - ASSERT(size <= buffer_size); - ASSERT(alignment <= buffer_size); +std::pair OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { + ASSERT(size <= BUFFER_SIZE); + ASSERT(alignment <= BUFFER_SIZE); mapped_size = size; if (alignment > 0) { buffer_pos = Common::AlignUp(buffer_pos, alignment); } - bool invalidate = false; - if (buffer_pos + size > buffer_size) { + if (buffer_pos + size > BUFFER_SIZE) { MICROPROFILE_SCOPE(OpenGL_StreamBuffer); glInvalidateBufferData(gl_buffer.handle); + state_tracker.InvalidateStreamBuffer(); buffer_pos = 0; - invalidate = true; } - return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); + return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -4,29 +4,31 @@ #pragma once -#include +#include + #include + #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { class Device; +class StateTracker; class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); ~OGLStreamBuffer(); /* * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes * and the optional alignment requirement. * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, the offset within the buffer, - * and the invalidation flag for previous chunks. + * The return values are the pointer to the new chunk, and the offset within the buffer. * The actual used size must be specified on unmapping the chunk. */ - std::tuple Map(GLsizeiptr size, GLintptr alignment = 0); + std::pair Map(GLsizeiptr size, GLintptr alignment = 0); void Unmap(GLsizeiptr size); @@ -39,15 +41,18 @@ public: } GLsizeiptr Size() const noexcept { - return buffer_size; + return BUFFER_SIZE; } private: + static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; + + StateTracker& state_tracker; + OGLBuffer gl_buffer; GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; - GLsizeiptr buffer_size = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index daf352b50..4c690418c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -2,173 +2,238 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include +#include +#include +#include + +#include + +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/surface_base.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/texture.h" +#include "video_core/textures/decoders.h" namespace OpenGL { -using Tegra::Texture::SwizzleSource; -using VideoCore::MortonSwizzleMode; +namespace { +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureMipmapFilter; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCommon::CalculateLevelStrideAlignment; +using VideoCommon::ImageCopy; +using VideoCommon::ImageFlagBits; +using VideoCommon::ImageType; +using VideoCommon::NUM_RT; +using VideoCommon::SamplesLog2; +using VideoCommon::SwizzleParameters; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatSRGB; +using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", - MP_RGB(128, 192, 128)); +struct CopyOrigin { + GLint level; + GLint x; + GLint y; + GLint z; +}; -namespace { +struct CopyRegion { + GLsizei width; + GLsizei height; + GLsizei depth; +}; struct FormatTuple { GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; + GLenum store_format = internal_format; }; -constexpr std::array tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - - // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - - // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; +constexpr std::array ACCELERATED_FORMATS{ + GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, + GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, + GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I, + GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I, + GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16, + GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM, + GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, +}; + const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast(pixel_format)]; + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; } -GLenum GetTextureTarget(const SurfaceTarget& target) { - switch (target) { - case SurfaceTarget::TextureBuffer: +GLenum ImageTarget(const VideoCommon::ImageInfo& info) { + switch (info.type) { + case ImageType::e1D: + return GL_TEXTURE_1D_ARRAY; + case ImageType::e2D: + if (info.num_samples > 1) { + return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; + } + return GL_TEXTURE_2D_ARRAY; + case ImageType::e3D: + return GL_TEXTURE_3D; + case ImageType::Linear: + return GL_TEXTURE_2D_ARRAY; + case ImageType::Buffer: return GL_TEXTURE_BUFFER; - case SurfaceTarget::Texture1D: + } + UNREACHABLE_MSG("Invalid image type={}", info.type); + return GL_NONE; +} + +GLenum ImageTarget(ImageViewType type, int num_samples = 1) { + const bool is_multisampled = num_samples > 1; + switch (type) { + case ImageViewType::e1D: return GL_TEXTURE_1D; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D; - case SurfaceTarget::Texture3D: + case ImageViewType::e2D: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; + case ImageViewType::Cube: + return GL_TEXTURE_CUBE_MAP; + case ImageViewType::e3D: return GL_TEXTURE_3D; - case SurfaceTarget::Texture1DArray: + case ImageViewType::e1DArray: return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP; - case SurfaceTarget::TextureCubeArray: + case ImageViewType::e2DArray: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + case ImageViewType::CubeArray: return GL_TEXTURE_CUBE_MAP_ARRAY; + case ImageViewType::Rect: + return GL_TEXTURE_RECTANGLE; + case ImageViewType::Buffer: + return GL_TEXTURE_BUFFER; } - UNREACHABLE(); - return {}; + UNREACHABLE_MSG("Invalid image view type={}", type); + return GL_NONE; } -GLint GetSwizzleSource(SwizzleSource source) { +GLenum TextureMode(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + +GLint Swizzle(SwizzleSource source) { switch (source) { case SwizzleSource::Zero: return GL_ZERO; @@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) { case SwizzleSource::OneFloat: return GL_ONE; } - UNREACHABLE(); + UNREACHABLE_MSG("Invalid swizzle source={}", source); return GL_NONE; } -GLenum GetComponent(PixelFormat format, bool is_first) { - switch (format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; - case PixelFormat::S8_UINT_D24_UNORM: - return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; +GLenum AttachmentType(PixelFormat format) { + switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; default: - UNREACHABLE(); - return GL_DEPTH_COMPONENT; + UNIMPLEMENTED_MSG("Unimplemented type={}", type); + return GL_NONE; } } -void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { - if (params.IsBuffer()) { - return; +[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { + if (!device.HasASTC() && IsPixelFormatASTC(format)) { + return true; } - glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast(params.num_levels - 1)); - if (params.num_levels == 1) { - glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); + switch (format) { + case PixelFormat::BC4_UNORM: + case PixelFormat::BC5_UNORM: + return type == ImageType::e3D; + default: + break; } + return false; } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, - OGLBuffer& texture_buffer) { - OGLTexture texture; - texture.Create(target); +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; + } +} - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); - break; - case SurfaceTarget::TextureBuffer: - texture_buffer.Create(); - glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), - nullptr, GL_DYNAMIC_STORAGE_BIT); - glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); +void ApplySwizzle(GLuint handle, PixelFormat format, std::array swizzle) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + TextureMode(format, swizzle[0] == SwizzleSource::R)); + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); break; - case SurfaceTarget::Texture2D: - case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height); + default: break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height, params.depth); + } + std::array gl_swizzle; + std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); +} + +[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, + const VideoCommon::ImageInfo& info) { + // Disable accelerated uploads for now as they don't implement swizzled uploads + return false; + switch (info.type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: break; default: - UNREACHABLE(); + return false; + } + const GLenum internal_format = GetFormatTuple(info.format).internal_format; + const auto& format_info = runtime.FormatInfo(info.type, internal_format); + if (format_info.is_compressed) { + return false; + } + if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + return false; } + if (format_info.compatibility_by_size) { + return true; + } + const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); + const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; + return format_info.compatibility_class == store_class; +} - ApplyTextureDefaults(params, texture.handle); +[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, + VideoCommon::SubresourceLayers subresource, GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyOrigin{ + .level = static_cast(subresource.base_level), + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .z = static_cast(subresource.base_layer), + }; + case GL_TEXTURE_3D: + return CopyOrigin{ + .level = static_cast(subresource.base_level), + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .z = static_cast(offset.z), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; + } +} - return texture; +[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, + VideoCommon::SubresourceLayers dst_subresource, + GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyRegion{ + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .depth = static_cast(dst_subresource.num_layers), + }; + case GL_TEXTURE_3D: + return CopyRegion{ + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .depth = static_cast(extent.depth), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyRegion{.width = 0, .height = 0, .depth = 0}; + } } -constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | - (static_cast(z_source) << 8) | static_cast(w_source); +void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { + if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + const GLuint texture = image_view->DefaultHandle(); + glNamedFramebufferTexture(fbo, attachment, texture, 0); + return; + } + const GLuint texture = image_view->Handle(ImageViewType::e3D); + if (image_view->range.extent.layers > 1) { + // TODO: OpenGL doesn't support rendering to a fixed number of slices + glNamedFramebufferTexture(fbo, attachment, texture, 0); + } else { + const u32 slice = image_view->range.base.layer; + glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); + } } } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : SurfaceBase{gpu_addr_, params_, is_astc_supported_} { - if (is_converted) { - internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; - format = GL_RGBA; - type = GL_UNSIGNED_BYTE; - } else { - const auto& tuple{GetFormatTuple(params.pixel_format)}; - internal_format = tuple.internal_format; - format = tuple.format; - type = tuple.type; - is_compressed = params.IsCompressed(); - } - target = GetTextureTarget(params.target); - texture = CreateTexture(params, target, internal_format, texture_buffer); - DecorateSurfaceName(); +ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) + : span(map, size), sync{sync_}, handle{handle_} {} - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; +ImageBufferMap::~ImageBufferMap() { + if (sync) { + sync->Create(); } - - main_view = - CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); } -CachedSurface::~CachedSurface() = default; +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, + StateTracker& state_tracker_) + : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; + for (size_t i = 0; i < TARGETS.size(); ++i) { + const GLenum target = TARGETS[i]; + for (const FormatTuple& tuple : FORMAT_TABLE) { + const GLenum format = tuple.internal_format; + GLint compat_class; + GLint compat_type; + GLint is_compressed; + glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); + glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, + &compat_type); + glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); + const FormatProperties properties{ + .compatibility_class = static_cast(compat_class), + .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, + .is_compressed = is_compressed == GL_TRUE, + }; + format_properties[i].emplace(format, properties); + } + } + null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); + null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); + null_image_3d.Create(GL_TEXTURE_3D); + null_image_rect.Create(GL_TEXTURE_RECTANGLE); + glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); + glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); + glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); + glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); + + std::array new_handles; + glGenTextures(static_cast(new_handles.size()), new_handles.data()); + null_image_view_1d.handle = new_handles[0]; + null_image_view_2d.handle = new_handles[1]; + null_image_view_2d_array.handle = new_handles[2]; + null_image_view_cube.handle = new_handles[3]; + glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, + 0, 1); + glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, + 1, 0, 1); + glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, + null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); + glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, + GL_R8, 0, 1, 0, 6); + const std::array texture_handles{ + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, + null_image_view_2d_array.handle, null_image_view_cube.handle, + }; + for (const GLuint handle : texture_handles) { + static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); + } + const auto set_view = [this](ImageViewType type, GLuint handle) { + if (device.HasDebuggingToolAttached()) { + const std::string name = fmt::format("NullImage {}", type); + glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + } + null_image_views[static_cast(type)] = handle; + }; + set_view(ImageViewType::e1D, null_image_view_1d.handle); + set_view(ImageViewType::e2D, null_image_view_2d.handle); + set_view(ImageViewType::Cube, null_image_view_cube.handle); + set_view(ImageViewType::e3D, null_image_3d.handle); + set_view(ImageViewType::e1DArray, null_image_1d_array.handle); + set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); + set_view(ImageViewType::CubeArray, null_image_cube_array.handle); + set_view(ImageViewType::Rect, null_image_rect.handle); +} -void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Download); +TextureCacheRuntime::~TextureCacheRuntime() = default; - if (params.IsBuffer()) { - glGetNamedBufferSubData(texture_buffer.handle, 0, - static_cast(params.GetHostSizeInBytes(false)), - staging_buffer.data()); - return; - } +void TextureCacheRuntime::Finish() { + glFinish(); +} - SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} - for (u32 level = 0; level < params.emulated_levels; ++level) { - glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); +ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} - u8* const mip_data = staging_buffer.data() + mip_offset; - const GLsizei size = static_cast(params.GetHostMipmapSize(level)); - if (is_compressed) { - glGetCompressedTextureImage(texture.handle, level, size, mip_data); - } else { - glGetTextureImage(texture.handle, level, format, type, size, mip_data); - } +void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, + std::span copies) { + const GLuint dst_name = dst_image.Handle(); + const GLuint src_name = src_image.Handle(); + const GLenum dst_target = ImageTarget(dst_image.info); + const GLenum src_target = ImageTarget(src_image.info); + for (const ImageCopy& copy : copies) { + const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); + const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); + const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); + glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, + src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, + dst_origin.y, dst_origin.z, region.width, region.height, region.depth); } } -void CachedSurface::UploadTexture(const std::vector& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Upload); - SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.emulated_levels; ++level) { - UploadTextureMipmap(level, staging_buffer); +bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + return false; } + return true; } -void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& staging_buffer) { - glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - const u8* buffer{staging_buffer.data() + mip_offset}; - if (is_compressed) { - const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - switch (params.target) { - case SurfaceTarget::Texture2D: - glCompressedTextureSubImage2D(texture.handle, level, 0, 0, - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - static_cast(params.GetMipDepth(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::TextureCubemap: { - const std::size_t host_layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.depth; ++face) { - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), 1, - internal_format, - static_cast(host_layer_size), buffer); - buffer += host_layer_size; - } - break; - } - default: - UNREACHABLE(); - } +void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, + std::span copies) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + ASSERT(src.info.type == ImageType::e3D); + util_shaders.CopyBC4(dst, src, copies); } else { - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, - buffer); - break; - case SurfaceTarget::TextureBuffer: - ASSERT(level == 0); - glNamedBufferSubData(texture_buffer.handle, 0, - params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), - params.GetMipHeight(level), format, type, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D( - texture.handle, level, 0, 0, 0, static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - static_cast(params.GetMipDepth(level)), format, type, buffer); - break; - case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.depth; ++face) { - glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), - params.GetMipWidth(level), params.GetMipHeight(level), 1, - format, type, buffer); - buffer += params.GetHostLayerSize(level); - } - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } } -void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); -} +void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + state_tracker.NotifyScissor0(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { - LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); + ASSERT(dst->BufferBits() == src->BufferBits()); + + glEnable(GL_FRAMEBUFFER_SRGB); + glDisable(GL_RASTERIZER_DISCARD); + glDisablei(GL_SCISSOR_TEST, 0); + + const GLbitfield buffer_bits = dst->BufferBits(); + const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; + const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, + src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, + dst_region[1].x, dst_region[1].y, buffer_bits, + is_linear ? GL_LINEAR : GL_NEAREST); } -View CachedSurface::CreateView(const ViewParams& view_key) { - return CreateViewInner(view_key, false); +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, + size_t buffer_offset, + std::span swizzles) { + switch (image.info.type) { + case ImageType::e2D: + return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + case ImageType::e3D: + return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + case ImageType::Linear: + return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + default: + UNREACHABLE(); + break; + } } -View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { - auto view = std::make_shared(*this, view_key, is_proxy); - views[view_key] = view; - if (!is_proxy) - view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); - return view; +void TextureCacheRuntime::InsertUploadMemoryBarrier() { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, - bool is_proxy_) - : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, - target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { - if (!is_proxy_) { - main_view = CreateTextureView(); +FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { + switch (type) { + case ImageType::e1D: + return format_properties[0].at(internal_format); + case ImageType::e2D: + case ImageType::Linear: + return format_properties[1].at(internal_format); + case ImageType::e3D: + return format_properties[2].at(internal_format); + default: + UNREACHABLE(); + return FormatProperties{}; } } -CachedSurfaceView::~CachedSurfaceView() = default; +TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} -void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { - ASSERT(params.num_levels == 1); +TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - if (params.target == SurfaceTarget::Texture3D) { - if (params.num_layers > 1) { - ASSERT(params.base_layer == 0); - glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); - } else { - glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, - params.base_level, params.base_layer); - } - return; +ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, + bool insert_fence) { + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); +} + +size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional index = FindBuffer(requested_size); index) { + return *index; } - if (params.num_layers > 1) { - UNIMPLEMENTED_IF(params.base_layer != 0); - glFramebufferTexture(fb_target, attachment, GetTexture(), 0); - return; + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + glNamedBufferStorage(buffer.handle, requested_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, requested_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + + syncs.emplace_back(); + sizes.push_back(requested_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { + size_t smallest_buffer = std::numeric_limits::max(); + std::optional found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + GLint status; + glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); + if (status != GL_SIGNALED) { + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; } + return found; +} - const GLenum view_target = surface.GetTarget(); - const GLuint texture = surface.GetTexture(); - switch (surface.GetSurfaceParams().target) { - case SurfaceTarget::Texture1D: - glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); +Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { + if (CanBeAccelerated(runtime, info)) { + flags |= ImageFlagBits::AcceleratedUpload; + } + if (IsConverted(runtime.device, info.format, info.type)) { + flags |= ImageFlagBits::Converted; + gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + gl_store_format = GL_RGBA8; + gl_format = GL_RGBA; + gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } else { + const auto& tuple = GetFormatTuple(info.format); + gl_internal_format = tuple.internal_format; + gl_store_format = tuple.store_format; + gl_format = tuple.format; + gl_type = tuple.type; + } + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); break; - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, - params.base_layer); + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_store_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + buffer.Create(); + glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); break; default: - UNIMPLEMENTED(); + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this); + glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, + static_cast(name.size()), name.data()); } } -GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - if (GetSurfaceParams().IsBuffer()) { - return GetTexture(); - } - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (current_swizzle == new_swizzle) { - return current_view; - } - current_swizzle = new_swizzle; +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - OGLTextureView& view = entry->second; - if (!is_cache_miss) { - current_view = view.handle; - return view.handle; - } - view = CreateTextureView(); - current_view = view.handle; + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - std::array swizzle{x_source, y_source, z_source, w_source}; + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); - switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - case PixelFormat::S8_UINT_D24_UNORM: - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, - GetComponent(pixel_format, x_source == SwizzleSource::R)); - - // Make sure we sample the first component - std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { - return value == SwizzleSource::G ? SwizzleSource::R : value; - }); - [[fallthrough]]; - default: { - const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), - GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; - glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - break; - } + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); + } + CopyBufferToImage(copy, buffer_offset); } - return view.handle; } -OGLTextureView CachedSurfaceView::CreateTextureView() const { - OGLTextureView texture_view; - texture_view.Create(); - - if (target == GL_TEXTURE_3D) { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, 0, 1); - } else { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, params.base_layer, params.num_layers); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + for (const VideoCommon::BufferCopy& copy : copies) { + glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, + copy.dst_offset, copy.size); } - ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); - - return texture_view; } -TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - StateTracker& state_tracker_) - : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, - state_tracker{state_tracker_} { - src_framebuffer.Create(); - dst_framebuffer.Create(); -} +void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API -TextureCacheOpenGL::~TextureCacheOpenGL() = default; + glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); + glPixelStorei(GL_PACK_ALIGNMENT, 1); -Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared(gpu_addr, params, is_astc_supported); -} + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); -void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - if (src_params.type != dst_params.type) { - // A fallback is needed - return; + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); + } + CopyImageToBuffer(copy, buffer_offset); } - const auto src_handle = src_surface->GetTexture(); - const auto src_target = src_surface->GetTarget(); - const auto dst_handle = dst_surface->GetTexture(); - const auto dst_target = dst_surface->GetTarget(); - glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, - copy_params.source_y, copy_params.source_z, dst_handle, dst_target, - copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, - copy_params.dest_z, copy_params.width, copy_params.height, - copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - const auto& src_params{src_view->GetSurfaceParams()}; - const auto& dst_params{dst_view->GetSurfaceParams()}; - UNIMPLEMENTED_IF(src_params.depth != 1); - UNIMPLEMENTED_IF(dst_params.depth != 1); - - state_tracker.NotifyScissor0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); +void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + const void* const offset = reinterpret_cast(copy.buffer_offset + buffer_offset); - if (dst_params.srgb_conversion) { - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); + switch (info.type) { + case ImageType::e1D: + if (is_compressed) { + glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, + copy.image_subresource.num_layers, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e2D: + case ImageType::Linear: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, + copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e3D: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, + copy.image_extent.height, copy.image_extent.depth, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, + copy.image_extent.width, copy.image_extent.height, + copy.image_extent.depth, gl_format, gl_type, offset); + } + break; + default: + UNREACHABLE(); } - glDisable(GL_RASTERIZER_DISCARD); - glDisablei(GL_SCISSOR_TEST, 0); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); - - GLenum buffers = 0; - if (src_params.type == SurfaceType::ColorTexture) { - src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +} - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + const GLint x_offset = copy.image_offset.x; + const GLsizei width = copy.image_extent.width; - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); + const GLint level = copy.image_subresource.base_level; + const GLsizei buffer_size = static_cast(copy.buffer_size); + void* const offset = reinterpret_cast(copy.buffer_offset + buffer_offset); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); + GLint y_offset = 0; + GLint z_offset = 0; + GLsizei height = 1; + GLsizei depth = 1; - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + switch (info.type) { + case ImageType::e1D: + y_offset = copy.image_subresource.base_layer; + height = copy.image_subresource.num_layers; + break; + case ImageType::e2D: + case ImageType::Linear: + y_offset = copy.image_offset.y; + z_offset = copy.image_subresource.base_layer; + height = copy.image_extent.height; + depth = copy.image_subresource.num_layers; + break; + case ImageType::e3D: + y_offset = copy.image_offset.y; + z_offset = copy.image_offset.z; + height = copy.image_extent.height; + depth = copy.image_extent.depth; + break; + default: + UNREACHABLE(); + } + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + if (is_compressed) { + glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, + height, depth, buffer_size, offset); + } else { + glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, + depth, gl_format, gl_type, buffer_size, offset); } - - const Common::Rectangle& src_rect = copy_config.src_rect; - const Common::Rectangle& dst_rect = copy_config.dst_rect; - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - glBlitFramebuffer(static_cast(src_rect.left), static_cast(src_rect.top), - static_cast(src_rect.right), static_cast(src_rect.bottom), - static_cast(dst_rect.left), static_cast(dst_rect.top), - static_cast(dst_rect.right), static_cast(dst_rect.bottom), - buffers, - is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } -void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { - MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { + const Device& device = runtime.device; + if (True(image.flags & ImageFlagBits::Converted)) { + internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + } else { + internal_format = GetFormatTuple(format).internal_format; + } + VideoCommon::SubresourceRange flatten_range = info.range; + std::array handles; + stored_views.reserve(2); - const auto source_format = GetFormatTuple(src_params.pixel_format); - const auto dest_format = GetFormatTuple(dst_params.pixel_format); + switch (info.type) { + case ImageViewType::e1DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e1D: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + break; + case ImageViewType::e2DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e2D: + if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { + // 2D and 2D array views on a 3D textures are used exclusively for render targets + ASSERT(info.range.extent.levels == 1); + const VideoCommon::SubresourceRange slice_range{ + .base = {.level = info.range.base.level, .layer = 0}, + .extent = {.levels = 1, .layers = 1}, + }; + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); + break; + } + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); + break; + case ImageViewType::e3D: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + break; + case ImageViewType::CubeArray: + flatten_range.extent.layers = 6; + [[fallthrough]]; + case ImageViewType::Cube: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + break; + case ImageViewType::Rect: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + break; + case ImageViewType::Buffer: + glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); + SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + break; + } + default_handle = Handle(info.type); +} - const std::size_t source_size = src_surface->GetHostSizeInBytes(); - const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} - const std::size_t buffer_size = std::max(source_size, dest_size); +void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, + GLuint handle, const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range) { + if (info.type == ImageViewType::Buffer) { + // TODO: Take offset from buffer cache + glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, + image.guest_size_bytes); + } else { + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); + } + } + if (device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this, view_type); + glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + } + stored_views.emplace_back().handle = handle; + views[static_cast(view_type)] = handle; +} - GLuint copy_pbo_handle = FetchPBO(buffer_size); +Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { + const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; + const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); + const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); + const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); + const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); + const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; + + UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); + UNIMPLEMENTED_IF(config.float_coord_normalization != 0); + + sampler.Create(); + const GLuint handle = sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); + } + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } +} - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. + GLuint handle; + glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + + GLsizei num_buffers = 0; + std::array gl_draw_buffers; + gl_draw_buffers.fill(GL_NONE); + + for (size_t index = 0; index < color_buffers.size(); ++index) { + const ImageView* const image_view = color_buffers[index]; + if (!image_view) { + continue; + } + buffer_bits |= GL_COLOR_BUFFER_BIT; + gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; + num_buffers = static_cast(index + 1); - if (src_surface->IsCompressed()) { - glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast(source_size), - nullptr); - } else { - glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, - static_cast(source_size), nullptr); + const GLenum attachment = static_cast(GL_COLOR_ATTACHMENT0 + index); + AttachTexture(handle, attachment, image_view); } - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + if (const ImageView* const image_view = depth_buffer; image_view) { + if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { + buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } else { + buffer_bits |= GL_DEPTH_BUFFER_BIT; + } + const GLenum attachment = AttachmentType(image_view->format); + AttachTexture(handle, attachment, image_view); + } - const GLsizei width = static_cast(dst_params.width); - const GLsizei height = static_cast(dst_params.height); - const GLsizei depth = static_cast(dst_params.depth); - if (dst_surface->IsCompressed()) { - LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); - UNREACHABLE(); + if (num_buffers > 1) { + glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); + } else if (num_buffers > 0) { + glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]); } else { - switch (dst_params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::TextureCubemap: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); - UNREACHABLE(); - } + glNamedFramebufferDrawBuffer(handle, GL_NONE); } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glTextureBarrier(); -} + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); + // TODO + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); -GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { - ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); - const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); - OGLBuffer& cp = copy_pbo_cache[l2]; - if (cp.handle == 0) { - const std::size_t ceil_size = 1ULL << l2; - cp.Create(); - cp.MakeStreamCopy(ceil_size); + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(key); + glObjectLabel(GL_FRAMEBUFFER, handle, static_cast(name.size()), name.data()); } - return cp.handle; + framebuffer.handle = handle; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 72b284fab..04193e31e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -4,157 +4,247 @@ #pragma once -#include -#include #include -#include -#include -#include +#include #include -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; - -class CachedSurfaceView; -class CachedSurface; -class TextureCacheOpenGL; +class Device; +class ProgramManager; class StateTracker; -using Surface = std::shared_ptr; -using View = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +class Framebuffer; +class Image; +class ImageView; +class Sampler; -class CachedSurface final : public VideoCommon::SurfaceBase { - friend CachedSurfaceView; +using VideoCommon::ImageId; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +class ImageBufferMap { public: - explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_); - ~CachedSurface(); - - void UploadTexture(const std::vector& staging_buffer) override; - void DownloadTexture(std::vector& staging_buffer) override; + explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); + ~ImageBufferMap(); - GLenum GetTarget() const { - return target; + GLuint Handle() const noexcept { + return handle; } - GLuint GetTexture() const { - return texture.handle; + std::span Span() const noexcept { + return span; } - bool IsCompressed() const { - return is_compressed; +private: + std::span span; + OGLSync* sync; + GLuint handle; +}; + +struct FormatProperties { + GLenum compatibility_class; + bool compatibility_by_size; + bool is_compressed; +}; + +class TextureCacheRuntime { + friend Framebuffer; + friend Image; + friend ImageView; + friend Sampler; + +public: + explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, + StateTracker& state_tracker); + ~TextureCacheRuntime(); + + void Finish(); + + ImageBufferMap MapUploadBuffer(size_t size); + + ImageBufferMap MapDownloadBuffer(size_t size); + + void CopyImage(Image& dst, Image& src, std::span copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + UNIMPLEMENTED(); } -protected: - void DecorateSurfaceName() override; + bool CanImageBeCopied(const Image& dst, const Image& src); + + void EmulateCopyImage(Image& dst, Image& src, std::span copies); + + void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); - View CreateView(const ViewParams& view_key) override; - View CreateViewInner(const ViewParams& view_key, bool is_proxy); + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void InsertUploadMemoryBarrier(); + + FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; private: - void UploadTextureMipmap(u32 level, const std::vector& staging_buffer); + struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); - GLenum internal_format{}; - GLenum format{}; - GLenum type{}; - bool is_compressed{}; - GLenum target{}; - u32 view_count{}; + ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - OGLTexture texture; - OGLBuffer texture_buffer; + size_t RequestBuffer(size_t requested_size); + + std::optional FindBuffer(size_t requested_size); + + std::vector syncs; + std::vector buffers; + std::vector maps; + std::vector sizes; + GLenum storage_flags; + GLenum map_flags; + }; + + const Device& device; + StateTracker& state_tracker; + UtilShaders util_shaders; + + std::array, 3> format_properties; + + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; + + OGLTexture null_image_1d_array; + OGLTexture null_image_cube_array; + OGLTexture null_image_3d; + OGLTexture null_image_rect; + OGLTextureView null_image_view_1d; + OGLTextureView null_image_view_2d; + OGLTextureView null_image_view_2d_array; + OGLTextureView null_image_view_cube; + + std::array null_image_views; }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class Image : public VideoCommon::ImageBase { + friend ImageView; + public: - explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); - ~CachedSurfaceView(); + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); - /// @brief Attaches this texture view to the currently bound fb_target framebuffer - /// @param attachment Attachment to bind textures to - /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) - void Attach(GLenum attachment, GLenum fb_target) const; + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); - void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); + void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span copies); - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); + GLuint Handle() const noexcept { + return texture.handle; } - GLuint GetTexture() const { - if (is_proxy) { - return surface.GetTexture(); - } - return main_view.handle; +private: + void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + + void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + + OGLTexture texture; + OGLTextureView store_view; + OGLBuffer buffer; + GLenum gl_internal_format = GL_NONE; + GLenum gl_store_format = GL_NONE; + GLenum gl_format = GL_NONE; + GLenum gl_type = GL_NONE; +}; + +class ImageView : public VideoCommon::ImageViewBase { + friend Image; + +public: + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + + [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { + return views[static_cast(query_type)]; } - GLenum GetFormat() const { - return format; + [[nodiscard]] GLuint DefaultHandle() const noexcept { + return default_handle; } - const SurfaceParams& GetSurfaceParams() const { - return surface.GetSurfaceParams(); + [[nodiscard]] GLenum Format() const noexcept { + return internal_format; } private: - OGLTextureView CreateTextureView() const; + void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range); + + std::array views{}; + std::vector stored_views; + GLuint default_handle = 0; + GLenum internal_format = GL_NONE; +}; + +class ImageAlloc : public VideoCommon::ImageAllocBase {}; - CachedSurface& surface; - const GLenum format; - const GLenum target; - const bool is_proxy; +class Sampler { +public: + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - std::unordered_map view_cache; - OGLTextureView main_view; + GLuint Handle() const noexcept { + return sampler.handle; + } - // Use an invalid default so it always fails the comparison test - u32 current_swizzle = 0xffffffff; - GLuint current_view = 0; +private: + OGLSampler sampler; }; -class TextureCacheOpenGL final : public TextureCacheBase { +class Framebuffer { public: - explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - StateTracker& state_tracker); - ~TextureCacheOpenGL(); - -protected: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; + explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; + [[nodiscard]] GLuint Handle() const noexcept { + return framebuffer.handle; + } - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; + [[nodiscard]] GLbitfield BufferBits() const noexcept { + return buffer_bits; + } private: - GLuint FetchPBO(std::size_t buffer_size); - - StateTracker& state_tracker; + OGLFramebuffer framebuffer; + GLbitfield buffer_bits = GL_NONE; +}; - OGLFramebuffer src_framebuffer; - OGLFramebuffer dst_framebuffer; - std::unordered_map copy_pbo_cache; +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = true; + static constexpr bool HAS_EMULATED_COPIES = true; + + using Runtime = OpenGL::TextureCacheRuntime; + using Image = OpenGL::Image; + using ImageAlloc = OpenGL::ImageAlloc; + using ImageView = OpenGL::ImageView; + using Sampler = OpenGL::Sampler; + using Framebuffer = OpenGL::Framebuffer; }; +using TextureCache = VideoCommon::TextureCache; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index dd4ee3361..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { return GL_FILL; } +inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { + switch (filter) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return GL_WEIGHTED_AVERAGE_ARB; + case Tegra::Texture::SamplerReduction::Min: + return GL_MIN; + case Tegra::Texture::SamplerReduction::Max: + return GL_MAX; + } + UNREACHABLE_MSG("Invalid reduction filter={}", static_cast(filter)); + return GL_WEIGHTED_AVERAGE_ARB; +} + inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { // Enumeration order matches register order. We can convert it arithmetically. return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast(swizzle); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cbfaaa99c..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -23,10 +23,10 @@ #include "core/telemetry_session.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/textures/decoders.h" namespace OpenGL { @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - PrepareRendertarget(framebuffer); RenderScreenshot(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + state_tracker.BindFramebuffer(0); DrawScreen(emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; - const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; - u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; - rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); - // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - gl_framebuffer_data.data(), host_ptr); - + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; + const std::span input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); // Update existing texture @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() { glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + // Generate presentation sampler + present_sampler.Create(); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + // Generate VBO handle for drawing vertex_buffer.Create(); @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const auto pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); GLint internal_format; @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - static_cast(framebuffer.pixel_format)); + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast(framebuffer.pixel_format)); } texture.resource.Release(); @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyPolygonModes(); state_tracker.NotifyViewport0(); state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask0(); + state_tracker.NotifyColorMask(0); state_tracker.NotifyBlend0(); state_tracker.NotifyFramebuffer(); state_tracker.NotifyFrontFace(); @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, 0); + glBindSampler(0, present_sampler.handle); glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() { DrawScreen(layout); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, renderer_settings.screenshot_bits); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 376f88766..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -102,6 +102,7 @@ private: StateTracker state_tracker{gpu}; // OpenGL object IDs + OGLSampler present_sampler; OGLBuffer vertex_buffer; OGLProgram vertex_program; OGLProgram fragment_program; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -0,0 +1,224 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" +#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/pitch_unswizzle_comp.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +using namespace HostShaders; + +using VideoCommon::Extent3D; +using VideoCommon::ImageCopy; +using VideoCommon::ImageType; +using VideoCommon::SwizzleParameters; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; +using VideoCore::Surface::BytesPerBlock; + +namespace { + +OGLProgram MakeProgram(std::string_view source) { + OGLShader shader; + shader.Create(source, GL_COMPUTE_SHADER); + + OGLProgram program; + program.Create(true, false, shader.handle); + return program; +} + +} // Anonymous namespace + +UtilShaders::UtilShaders(ProgramManager& program_manager_) + : program_manager{program_manager_}, + block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), + block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), + pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), + copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { + const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); + swizzle_table_buffer.Create(); + glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); +} + +UtilShaders::~UtilShaders() = default; + +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.layer_stride); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; + + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); + + const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.slice_size); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glUniform1ui(8, params.block_depth); + glUniform1ui(9, params.block_depth_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + static constexpr GLuint LOC_ORIGIN = 0; + static constexpr GLuint LOC_DESTINATION = 1; + static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; + static constexpr GLuint LOC_PITCH = 3; + + const u32 bytes_per_block = BytesPerBlock(image.info.format); + const GLenum format = StoreFormat(bytes_per_block); + const u32 pitch = image.info.pitch; + + UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), + "Non-power of two images are not implemented"); + + program_manager.BindHostCompute(pitch_unswizzle_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glUniform2ui(LOC_ORIGIN, 0, 0); + glUniform2i(LOC_DESTINATION, 0, 0); + glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); + glUniform1ui(LOC_PITCH, pitch); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span copies) { + static constexpr GLuint BINDING_INPUT_IMAGE = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; + static constexpr GLuint LOC_SRC_OFFSET = 0; + static constexpr GLuint LOC_DST_OFFSET = 1; + + program_manager.BindHostCompute(copy_bc4_program.handle); + + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_subresource.base_layer == 0); + ASSERT(copy.src_subresource.num_layers == 1); + ASSERT(copy.dst_subresource.base_layer == 0); + ASSERT(copy.dst_subresource.num_layers == 1); + + glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); + glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, + GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), + copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); + } + program_manager.RestoreGuestCompute(); +} + +GLenum StoreFormat(u32 bytes_per_block) { + switch (bytes_per_block) { + case 1: + return GL_R8UI; + case 2: + return GL_R16UI; + case 4: + return GL_R32UI; + case 8: + return GL_RG32UI; + case 16: + return GL_RGBA32UI; + } + UNREACHABLE(); + return GL_R8UI; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +class Image; +class ImageBufferMap; +class ProgramManager; + +class UtilShaders { +public: + explicit UtilShaders(ProgramManager& program_manager); + ~UtilShaders(); + + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void CopyBC4(Image& dst_image, Image& src_image, + std::span copies); + +private: + ProgramManager& program_manager; + + OGLBuffer swizzle_table_buffer; + + OGLProgram block_linear_unswizzle_2d_program; + OGLProgram block_linear_unswizzle_3d_program; + OGLProgram pitch_unswizzle_program; + OGLProgram copy_bc4_program; +}; + +GLenum StoreFormat(u32 bytes_per_block); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { - if (!GLAD_GL_KHR_debug) { - // We don't need to throw an error as this is just for debugging - return; - } - - std::string object_label; - if (extra_info.empty()) { - switch (identifier) { - case GL_TEXTURE: - object_label = fmt::format("Texture@0x{:016X}", addr); - break; - case GL_PROGRAM: - object_label = fmt::format("Shader@0x{:016X}", addr); - break; - default: - object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); - break; - } - } else { - object_label = fmt::format("{}@0x{:016X}", extra_info, addr); - } - glObjectLabel(identifier, handle, -1, static_cast(object_label.c_str())); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/common_types.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); - -} // namespace OpenGL -- cgit v1.2.3