diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 13 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 80 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 106 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 94 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 15 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 322 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 50 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 185 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 11 |
10 files changed, 649 insertions, 232 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 187a28e4d..d4dd10bb6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -5,6 +5,7 @@ #include <algorithm> #include <span> +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" @@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff .padding = 0, }; buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); - glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, - reinterpret_cast<const GLuint*>(&ssbo)); + glProgramLocalParametersI4uivNV( + PROGRAM_LUT[stage], + Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, + reinterpret_cast<const GLuint*>(&ssbo)); } } @@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf .padding = 0, }; buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); - glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, - reinterpret_cast<const GLuint*>(&ssbo)); + glProgramLocalParametersI4uivNV( + GL_COMPUTE_PROGRAM_NV, + Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, + reinterpret_cast<const GLuint*>(&ssbo)); } } diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index aa1cc592f..5c1f21c65 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -19,15 +19,6 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; -template <typename Range> -u32 AccumulateCount(const Range& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - size_t ComputePipelineKey::Hash() const noexcept { return static_cast<size_t>( Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); @@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); - num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); + num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); - const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; + const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; + const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); const bool is_glasm{assembly_program.handle != 0}; - const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)}; use_storage_buffers = !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); writes_global_memory = !use_storage_buffers && @@ -88,8 +79,7 @@ void ComputePipeline::Configure() { } texture_cache.SynchronizeComputeDescriptors(); - std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; - boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; + boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; std::array<GLuint, MAX_TEXTURES> samplers; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; @@ -119,33 +109,39 @@ void ComputePipeline::Configure() { } return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({ + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }); } }}; for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); samplers[sampler_binding++] = 0; } } - std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc, false); + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers[sampler_binding++] = sampler->Handle(); } } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); + for (const auto& desc : info.image_descriptors) { + add_image(desc, desc.is_written); + } + texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); @@ -161,7 +157,7 @@ void ComputePipeline::Configure() { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)}; buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); @@ -177,23 +173,45 @@ void ComputePipeline::Configure() { buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); - const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + + num_image_buffers}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; + u32 texture_scaling_mask{}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; + textures[texture_binding] = image_view.Handle(desc.type); + if (texture_cache.IsRescaling(image_view)) { + texture_scaling_mask |= 1u << texture_binding; + } + ++texture_binding; } } + u32 image_scaling_mask{}; for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } - images[image_binding++] = image_view.StorageView(desc.type, desc.format); + images[image_binding] = image_view.StorageView(desc.type, desc.format); + if (texture_cache.IsRescaling(image_view)) { + image_scaling_mask |= 1u << image_binding; + } + ++image_binding; + } + } + if (info.uses_rescaling_uniform) { + const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)}; + const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)}; + if (assembly_program.handle != 0) { + glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask, + float_image_scaling_mask, 0.0f, 0.0f); + } else { + glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask, + float_image_scaling_mask, 0.0f, 0.0f); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index bccb37a58..f8495896c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_notify.h" -#include "video_core/texture_cache/texture_cache_base.h" +#include "video_core/texture_cache/texture_cache.h" #if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] @@ -27,6 +27,7 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; using Shader::ImageDescriptor; +using Shader::NumDescriptors; using Shader::TextureBufferDescriptor; using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; @@ -35,15 +36,6 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; -template <typename Range> -u32 AccumulateCount(const Range& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline( base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors); } enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)}; num_image_buffers[stage] += num_img_buffers_bindings; num_images += num_img_buffers_bindings; - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + num_textures += NumDescriptors(info.texture_descriptors); + num_images += NumDescriptors(info.image_descriptors); + num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors); writes_global_memory |= std::ranges::any_of( info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); @@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline( template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; - std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; + std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; std::array<GLuint, MAX_TEXTURES> samplers; - size_t image_view_index{}; + size_t views_index{}; GLsizei sampler_binding{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = { + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }; } }}; if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = {handle.first}; samplers[sampler_binding++] = 0; } } } if constexpr (Spec::has_image_buffers) { for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = {handle.first}; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_binding++] = sampler->Handle(); @@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - add_image(desc); + add_image(desc, desc.is_written); } } }}; @@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - const std::span indices_span(image_view_indices.data(), image_view_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index)); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - ImageId* texture_buffer_index{image_view_ids.data()}; + VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { @@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); ++index; - ++texture_buffer_index; + ++texture_buffer_it; } }}; const Shader::Info& info{stage_infos[stage]}; @@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { @@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (!is_built.load(std::memory_order::relaxed)) { WaitForBuild(); } - if (assembly_programs[0].handle != 0) { + const bool use_assembly{assembly_programs[0].handle != 0}; + if (use_assembly) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindSourcePrograms(source_programs); } - const ImageId* views_it{image_view_ids.data()}; + const VideoCommon::ImageViewInOut* views_it{views.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; std::array<GLuint, MAX_TEXTURES> textures; @@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { views_it += num_texture_buffers[stage]; views_it += num_image_buffers[stage]; + u32 texture_scaling_mask{}; + u32 image_scaling_mask{}; + u32 stage_texture_binding{}; + u32 stage_image_binding{}; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; + textures[texture_binding] = image_view.Handle(desc.type); + if (texture_cache.IsRescaling(image_view)) { + texture_scaling_mask |= 1u << stage_texture_binding; + } + ++texture_binding; + ++stage_texture_binding; } } for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } - images[image_binding++] = image_view.StorageView(desc.type, desc.format); + images[image_binding] = image_view.StorageView(desc.type, desc.format); + if (texture_cache.IsRescaling(image_view)) { + image_scaling_mask |= 1u << stage_image_binding; + } + ++image_binding; + ++stage_image_binding; + } + } + if (info.uses_rescaling_uniform) { + const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)}; + const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)}; + const bool is_rescaling{texture_cache.IsRescaling()}; + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; + if (use_assembly) { + glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask, + float_image_scaling_mask, down_factor, 0.0f); + } else { + glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask, + float_image_scaling_mask, down_factor, 0.0f); } } }}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a6d9f7c43..9b516c64f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -184,6 +184,10 @@ void RasterizerOpenGL::Clear() { SyncRasterizeEnable(); SyncStencilTestState(); + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + SyncViewport(); if (regs.clear_flags.scissor) { SyncScissorTest(); } else { @@ -192,10 +196,6 @@ void RasterizerOpenGL::Clear() { } UNIMPLEMENTED_IF(regs.clear_flags.viewport); - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UpdateRenderTargets(true); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } @@ -214,8 +214,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - SyncState(); - GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; @@ -223,6 +221,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); + SyncState(); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(pipeline, primitive_mode); @@ -533,7 +533,8 @@ void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; - const bool dirty_viewport = flags[Dirty::Viewports]; + const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports]; + const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports; const bool dirty_clip_control = flags[Dirty::ClipControl]; if (dirty_clip_control || flags[Dirty::FrontFace]) { @@ -553,8 +554,7 @@ void RasterizerOpenGL::SyncViewport() { } glFrontFace(mode); } - - if (dirty_viewport || flags[Dirty::ClipControl]) { + if (dirty_viewport || dirty_clip_control) { flags[Dirty::ClipControl] = false; bool flip_y = false; @@ -570,37 +570,58 @@ void RasterizerOpenGL::SyncViewport() { state_tracker.ClipControl(origin, depth); state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); } + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + const auto conv = [scale](float value) -> GLfloat { + float new_value = value * scale; + if (scale < 1.0f) { + const bool sign = std::signbit(value); + new_value = std::round(std::abs(new_value)); + new_value = sign ? -new_value : new_value; + } + return static_cast<GLfloat>(new_value); + }; if (dirty_viewport) { flags[Dirty::Viewports] = false; - const bool force = flags[Dirty::ViewportTransform]; + const bool force = flags[Dirty::ViewportTransform] || rescale_viewports; flags[Dirty::ViewportTransform] = false; + flags[VideoCommon::Dirty::RescaleViewports] = false; - for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { - if (!force && !flags[Dirty::Viewport0 + i]) { + for (size_t index = 0; index < Maxwell::NumViewports; ++index) { + if (!force && !flags[Dirty::Viewport0 + index]) { continue; } - flags[Dirty::Viewport0 + i] = false; + flags[Dirty::Viewport0 + index] = false; - const auto& src = regs.viewport_transform[i]; - const Common::Rectangle<f32> rect{src.GetRect()}; - glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), - rect.GetHeight()); + const auto& src = regs.viewport_transform[index]; + GLfloat x = conv(src.translate_x - src.scale_x); + GLfloat y = conv(src.translate_y - src.scale_y); + GLfloat width = conv(src.scale_x * 2.0f); + GLfloat height = conv(src.scale_y * 2.0f); + + if (height < 0) { + y += height; + height = -height; + } + glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f, + height != 0.0f ? height : 1.0f); const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; const GLdouble far_depth = src.translate_z + src.scale_z; if (device.HasDepthBufferFloat()) { - glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth); + glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth); } else { - glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth); } if (!GLAD_GL_NV_viewport_swizzle) { continue; } - glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x), + glViewportSwizzleNV(static_cast<GLuint>(index), + MaxwellToGL::ViewportSwizzle(src.swizzle.x), MaxwellToGL::ViewportSwizzle(src.swizzle.y), MaxwellToGL::ViewportSwizzle(src.swizzle.z), MaxwellToGL::ViewportSwizzle(src.swizzle.w)); @@ -903,14 +924,34 @@ void RasterizerOpenGL::SyncLogicOpState() { void RasterizerOpenGL::SyncScissorTest() { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::Scissors]) { + if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) { return; } flags[Dirty::Scissors] = false; + const bool force = flags[VideoCommon::Dirty::RescaleScissors]; + flags[VideoCommon::Dirty::RescaleScissors] = false; + const auto& regs = maxwell3d.regs; + + const auto& resolution = Settings::values.resolution_info; + const bool is_rescaling{texture_cache.IsRescaling()}; + const u32 up_scale = is_rescaling ? resolution.up_scale : 1U; + const u32 down_shift = is_rescaling ? resolution.down_shift : 0U; + const auto scale_up = [up_scale, down_shift](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + const u32 upset = value * up_scale; + u32 acumm{}; + if ((up_scale >> down_shift) == 0) { + acumm = upset % 2; + } + const u32 converted_value = upset >> down_shift; + return std::max<u32>(converted_value + acumm, 1U); + }; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { - if (!flags[Dirty::Scissor0 + index]) { + if (!force && !flags[Dirty::Scissor0 + index]) { continue; } flags[Dirty::Scissor0 + index] = false; @@ -918,8 +959,8 @@ void RasterizerOpenGL::SyncScissorTest() { const auto& src = regs.scissor_test[index]; if (src.enable) { glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); - glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y, - src.max_x - src.min_x, src.max_y - src.min_y); + glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y), + scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y)); } else { glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); } @@ -935,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() { oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - - glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 8695c29e3..5e7101d28 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -166,7 +166,12 @@ void OGLFramebuffer::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); } void OGLFramebuffer::Release() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 02682bd76..42ef67628 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; - } + total_storage_buffers += + Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors); } else { // VertexB path when VertexA is present. auto& program_va{programs[0]}; auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - for (const auto& desc : program_vb.info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; - } + total_storage_buffers += + Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors); programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - u32 num_storage_buffers{}; - for (const auto& desc : program.info.storage_buffers_descriptors) { - num_storage_buffers += desc.count; - } + const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 8c3ca3d82..2f7d98d8b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,8 +9,8 @@ #include <glad/glad.h> +#include "common/literals.h" #include "common/settings.h" - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -42,6 +42,7 @@ using VideoCore::Surface::IsPixelFormatSRGB; using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; struct CopyOrigin { GLint level; @@ -316,6 +317,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + OGLTexture texture; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); + break; + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); + break; + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + UNREACHABLE(); + break; + default: + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + return texture; +} + [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { switch (format) { case PixelFormat::B5G6R5_UNORM: @@ -359,7 +406,8 @@ ImageBufferMap::~ImageBufferMap() { TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + : device{device_}, state_tracker{state_tracker_}, + util_shaders(program_manager), resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -426,6 +474,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); + + if (resolution.active) { + for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) { + rescale_draw_fbos[i].Create(); + rescale_read_fbos[i].Create(); + } + } } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -442,6 +497,15 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return download_buffers.RequestMap(size, false); } +u64 TextureCacheRuntime::GetDeviceLocalMemory() const { + if (GLAD_GL_NVX_gpu_memory_info) { + GLint cur_avail_mem_kb = 0; + glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); + return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; + } + return 2_GiB; // Return minimum requirements +} + void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) { const GLuint dst_name = dst_image.Handle(); @@ -605,13 +669,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req return found; } -Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, +Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) - : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { - if (CanBeAccelerated(runtime, info)) { + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { + if (CanBeAccelerated(*runtime, info)) { flags |= ImageFlagBits::AcceleratedUpload; } - if (IsConverted(runtime.device, info.format, info.type)) { + if (IsConverted(runtime->device, info.format, info.type)) { flags |= ImageFlagBits::Converted; gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; gl_format = GL_RGBA; @@ -622,58 +686,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = tuple.format; gl_type = tuple.type; } - const GLenum target = ImageTarget(info); - const GLsizei width = info.size.width; - const GLsizei height = info.size.height; - const GLsizei depth = info.size.depth; - const int max_host_mip_levels = std::bit_width(info.size.width); - const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); - const GLsizei num_layers = info.resources.layers; - const GLsizei num_samples = info.num_samples; - - GLuint handle = 0; - if (target != GL_TEXTURE_BUFFER) { - texture.Create(target); - handle = texture.handle; - } - switch (target) { - case GL_TEXTURE_1D_ARRAY: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); - break; - case GL_TEXTURE_2D_ARRAY: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); - break; - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { - // TODO: Where should 'fixedsamplelocations' come from? - const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); - glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, - height >> samples_y, num_layers, GL_FALSE); - break; - } - case GL_TEXTURE_RECTANGLE: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); - break; - case GL_TEXTURE_3D: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); - break; - case GL_TEXTURE_BUFFER: - UNREACHABLE(); - break; - default: - UNREACHABLE_MSG("Invalid target=0x{:x}", target); - break; - } - if (runtime.device.HasDebuggingToolAttached()) { + texture = MakeImage(info, gl_internal_format); + current_texture = texture.handle; + if (runtime->device.HasDebuggingToolAttached()) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, - static_cast<GLsizei>(name.size()), name.data()); + glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, + texture.handle, static_cast<GLsizei>(name.size()), name.data()); } } +Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} + Image::~Image() = default; void Image::UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(true); + } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); @@ -693,12 +724,18 @@ void Image::UploadMemory(const ImageBufferMap& map, } CopyBufferToImage(copy, map.offset); } + if (is_rescaled) { + ScaleUp(); + } } void Image::DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -716,6 +753,9 @@ void Image::DownloadMemory(ImageBufferMap& map, } CopyImageToBuffer(copy, map.offset); } + if (is_rescaled) { + ScaleUp(true); + } } GLuint Image::StorageHandle() noexcept { @@ -741,11 +781,11 @@ GLuint Image::StorageHandle() noexcept { return store_view.handle; } store_view.Create(); - glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0, + glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, info.resources.levels, 0, info.resources.layers); return store_view.handle; default: - return texture.handle; + return current_texture; } } @@ -849,6 +889,140 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } +void Image::Scale(bool up_scale) { + const auto format_type = GetFormatType(info.format); + const GLenum attachment = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return GL_COLOR_ATTACHMENT0; + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; + default: + UNREACHABLE(); + return GL_COLOR_ATTACHMENT0; + } + }(); + const GLenum mask = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return GL_COLOR_BUFFER_BIT; + case SurfaceType::Depth: + return GL_DEPTH_BUFFER_BIT; + case SurfaceType::DepthStencil: + return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; + default: + UNREACHABLE(); + return GL_COLOR_BUFFER_BIT; + } + }(); + const size_t fbo_index = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return 0; + case SurfaceType::Depth: + return 1; + case SurfaceType::DepthStencil: + return 2; + default: + UNREACHABLE(); + return 0; + } + }(); + const bool is_2d = info.type == ImageType::e2D; + const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0}; + // Integer formats must use NEAREST filter + const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)}; + const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST; + + const auto& resolution = runtime->resolution; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + const u32 original_width = info.size.width; + const u32 original_height = info.size.height; + + if (!upscaled_backup.handle) { + auto dst_info = info; + dst_info.size.width = scaled_width; + dst_info.size.height = scaled_height; + upscaled_backup = MakeImage(dst_info, gl_internal_format); + } + const u32 src_width = up_scale ? original_width : scaled_width; + const u32 src_height = up_scale ? original_height : scaled_height; + const u32 dst_width = up_scale ? scaled_width : original_width; + const u32 dst_height = up_scale ? scaled_height : original_height; + const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle; + const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle; + + // TODO (ameerj): Investigate other GL states that affect blitting. + glDisablei(GL_SCISSOR_TEST, 0); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width), + static_cast<GLfloat>(dst_height)); + + const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; + const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + for (s32 level = 0; level < info.resources.levels; ++level) { + const u32 src_level_width = std::max(1u, src_width >> level); + const u32 src_level_height = std::max(1u, src_height >> level); + const u32 dst_level_width = std::max(1u, dst_width >> level); + const u32 dst_level_height = std::max(1u, dst_height >> level); + + glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer); + + glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, + 0, dst_level_width, dst_level_height, mask, filter); + } + } + current_texture = dst_handle; + auto& state_tracker = runtime->GetStateTracker(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); +} + +bool Image::ScaleUp(bool ignore) { + if (True(flags & ImageFlagBits::Rescaled)) { + return false; + } + if (gl_format == 0 && gl_type == 0) { + // compressed textures + return false; + } + if (info.type == ImageType::Linear) { + UNREACHABLE(); + return false; + } + flags |= ImageFlagBits::Rescaled; + if (!runtime->resolution.active) { + return false; + } + has_scaled = true; + if (ignore) { + current_texture = upscaled_backup.handle; + return true; + } + Scale(true); + return true; +} + +bool Image::ScaleDown(bool ignore) { + if (False(flags & ImageFlagBits::Rescaled)) { + return false; + } + flags &= ~ImageFlagBits::Rescaled; + if (!runtime->resolution.active) { + return false; + } + if (ignore) { + current_texture = texture.handle; + return true; + } + Scale(false); + return true; +} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { @@ -862,7 +1036,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI flat_range = info.range; set_object_label = device.HasDebuggingToolAttached(); is_render_target = info.IsRenderTarget(); - original_texture = image.texture.handle; + original_texture = image.Handle(); num_samples = image.info.num_samples; if (!is_render_target) { swizzle[0] = info.x_source; @@ -950,9 +1124,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} -ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} +ImageView::~ImageView() = default; + GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { if (image_format == Shader::ImageFormat::Typeless) { return Handle(texture_type); @@ -1037,7 +1213,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); } else { LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); } @@ -1056,13 +1233,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of - // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared - // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with - // mismatching size, this is why core framebuffers are preferred. - GLuint handle; - glGenFramebuffers(1, &handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + framebuffer.Create(); + GLuint handle = framebuffer.handle; GLsizei num_buffers = 0; std::array<GLenum, NUM_RT> gl_draw_buffers; @@ -1110,31 +1282,31 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM const std::string name = VideoCommon::Name(key); glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); } - framebuffer.handle = handle; } +Framebuffer::~Framebuffer() = default; + void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, std::span<const VideoCommon::ImageCopy> copies) { static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; - const u32 requested_pbo_size = - std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); - - if (bgr_pbo_size < requested_pbo_size) { - bgr_pbo.Create(); - bgr_pbo_size = requested_pbo_size; - glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); - } + const u32 img_bpp = BytesPerBlock(src_image.info.format); for (const ImageCopy& copy : copies) { ASSERT(copy.src_offset == zero_offset); ASSERT(copy.dst_offset == zero_offset); - + const u32 num_src_layers = static_cast<u32>(copy.src_subresource.num_layers); + const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp; + if (bgr_pbo_size < copy_size) { + bgr_pbo.Create(); + bgr_pbo_size = copy_size; + glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); + } // Copy from source to PBO glPixelStorei(GL_PACK_ALIGNMENT, 1); glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.src_subresource.num_layers, src_image.GlFormat(), - src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr); + num_src_layers, src_image.GlFormat(), src_image.GlType(), + static_cast<GLsizei>(bgr_pbo_size), nullptr); // Copy from PBO to destination in desired GL format glPixelStorei(GL_UNPACK_ALIGNMENT, 1); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ca2c90be..1bb762568 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -15,6 +15,10 @@ #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" +namespace Settings { +struct ResolutionScalingInfo; +} + namespace OpenGL { class Device; @@ -78,9 +82,11 @@ public: ImageBufferMap DownloadStagingBuffer(size_t size); + u64 GetDeviceLocalMemory() const; + void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { UNIMPLEMENTED(); } @@ -110,6 +116,12 @@ public: bool HasNativeASTC() const noexcept; + void TickFrame() {} + + StateTracker& GetStateTracker() { + return state_tracker; + } + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); @@ -149,6 +161,10 @@ private: OGLTextureView null_image_view_cube; std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; + + std::array<OGLFramebuffer, 3> rescale_draw_fbos; + std::array<OGLFramebuffer, 3> rescale_read_fbos; + const Settings::ResolutionScalingInfo& resolution; }; class Image : public VideoCommon::ImageBase { @@ -157,6 +173,7 @@ class Image : public VideoCommon::ImageBase { public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit Image(const VideoCommon::NullImageParams&); ~Image(); @@ -174,7 +191,7 @@ public: GLuint StorageHandle() noexcept; GLuint Handle() const noexcept { - return texture.handle; + return current_texture; } GLuint GlFormat() const noexcept { @@ -185,16 +202,25 @@ public: return gl_type; } + bool ScaleUp(bool ignore = false); + + bool ScaleDown(bool ignore = false); + private: void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + void Scale(bool up_scale); + OGLTexture texture; + OGLTexture upscaled_backup; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; + TextureCacheRuntime* runtime{}; + GLuint current_texture{}; }; class ImageView : public VideoCommon::ImageViewBase { @@ -206,7 +232,15 @@ public: const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); - explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); + + ~ImageView(); + + ImageView(const ImageView&) = delete; + ImageView& operator=(const ImageView&) = delete; + + ImageView(ImageView&&) = default; + ImageView& operator=(ImageView&&) = default; [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); @@ -276,6 +310,14 @@ public: explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&&) = default; + Framebuffer& operator=(Framebuffer&&) = default; + [[nodiscard]] GLuint Handle() const noexcept { return framebuffer.handle; } @@ -293,7 +335,7 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; - static constexpr bool HAS_DEVICE_MEMORY_INFO = false; + static constexpr bool HAS_DEVICE_MEMORY_INFO = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7d7cba69c..28daacd82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,8 +21,13 @@ #include "core/memory.h" #include "core/perf_stats.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/fxaa_frag.h" +#include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" +#include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_gaussian_frag.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf framebuffer_crop_rect = framebuffer.crop_rect; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { + screen_info.was_accelerated = + rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); + if (screen_info.was_accelerated) { return; } @@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs + fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); + fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); + present_gaussian_fragment = + CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); + present_scaleforce_fragment = + CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), + GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + present_sampler_nn.Create(); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); // Generate VBO handle for drawing vertex_buffer.Create(); @@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + fxaa_framebuffer.Create(); } void RendererOpenGL::AddTelemetryFields() { @@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, texture.resource.Release(); texture.resource.Create(GL_TEXTURE_2D); glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); + fxaa_texture.Release(); + fxaa_texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F, + Settings::values.resolution_info.ScaleUp(screen_info.texture.width), + Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); + glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle, + 0); } void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyPolygonModes(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask(0); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + + state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + // Update background color before drawing glClearColor(Settings::values.bg_red.GetValue() / 255.0f, Settings::values.bg_green.GetValue() / 255.0f, Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glBindTextureUnit(0, screen_info.display_texture); + + if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) { + program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); + + glEnablei(GL_SCISSOR_TEST, 0); + auto viewport_width = screen_info.texture.width; + auto scissor_width = framebuffer_crop_rect.GetWidth(); + if (scissor_width <= 0) { + scissor_width = viewport_width; + } + auto viewport_height = screen_info.texture.height; + auto scissor_height = framebuffer_crop_rect.GetHeight(); + if (scissor_height <= 0) { + scissor_height = viewport_height; + } + if (screen_info.was_accelerated) { + viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); + scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); + viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); + scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); + } + glScissorIndexed(0, 0, 0, scissor_width, scissor_height); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width), + static_cast<GLfloat>(viewport_height)); + glDepthRangeIndexed(0, 0.0, 0.0); + + glBindSampler(0, present_sampler.handle); + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle); + + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + + glBindTextureUnit(0, fxaa_texture.handle); + } + // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + + GLuint fragment_handle; + const auto filter = Settings::values.scaling_filter.GetValue(); + switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + fragment_handle = present_bilinear_fragment.handle; + break; + case Settings::ScalingFilter::Bilinear: + fragment_handle = present_bilinear_fragment.handle; + break; + case Settings::ScalingFilter::Bicubic: + fragment_handle = present_bicubic_fragment.handle; + break; + case Settings::ScalingFilter::Gaussian: + fragment_handle = present_gaussian_fragment.handle; + break; + case Settings::ScalingFilter::ScaleForce: + fragment_handle = present_scaleforce_fragment.handle; + break; + case Settings::ScalingFilter::Fsr: + LOG_WARNING( + Render_OpenGL, + "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce"); + fragment_handle = present_scaleforce_fragment.handle; + break; + default: + fragment_handle = present_bilinear_fragment.handle; + break; + } + program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); @@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / static_cast<f32>(screen_info.texture.height); } + if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && + !screen_info.was_accelerated) { + scale_u /= Settings::values.resolution_info.up_factor; + scale_v /= Settings::values.resolution_info.up_factor; + } const auto& screen = layout.screen; const std::array vertices = { @@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { }; glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - // TODO: Signal state tracker about these changes - state_tracker.NotifyScreenDrawVertexArray(); - state_tracker.NotifyPolygonModes(); - state_tracker.NotifyViewport0(); - state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask(0); - state_tracker.NotifyBlend0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyFrontFace(); - state_tracker.NotifyCullTest(); - state_tracker.NotifyDepthTest(); - state_tracker.NotifyStencilTest(); - state_tracker.NotifyPolygonOffset(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); - state_tracker.NotifyLogicOp(); - state_tracker.NotifyClipControl(); - state_tracker.NotifyAlphaTest(); - - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { glEnable(GL_FRAMEBUFFER_SRGB); } else { glDisable(GL_FRAMEBUFFER_SRGB); } - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); - glDisable(GL_POLYGON_OFFSET_FILL); - glDisable(GL_RASTERIZER_DISCARD); - glDisable(GL_ALPHA_TEST); - glDisablei(GL_BLEND, 0); glDisablei(GL_SCISSOR_TEST, 0); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glCullFace(GL_BACK); - glFrontFace(GL_CW); - glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), static_cast<GLfloat>(layout.height)); - glDepthRangeIndexed(0, 0.0, 0.0); glEnableVertexAttribArray(PositionLocation); glEnableVertexAttribArray(TexCoordLocation); @@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); } - glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, present_sampler.handle); + if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { + glBindSampler(0, present_sampler.handle); + } else { + glBindSampler(0, present_sampler_nn.handle); + } glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index d455f572f..cda333cad 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -50,6 +50,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture{}; + bool was_accelerated = false; bool display_srgb{}; const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; @@ -109,9 +110,15 @@ private: // OpenGL object IDs OGLSampler present_sampler; + OGLSampler present_sampler_nn; OGLBuffer vertex_buffer; + OGLProgram fxaa_vertex; + OGLProgram fxaa_fragment; OGLProgram present_vertex; - OGLProgram present_fragment; + OGLProgram present_bilinear_fragment; + OGLProgram present_bicubic_fragment; + OGLProgram present_gaussian_fragment; + OGLProgram present_scaleforce_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer @@ -119,6 +126,8 @@ private: /// Display information for Switch screen ScreenInfo screen_info; + OGLTexture fxaa_texture; + OGLFramebuffer fxaa_framebuffer; /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; |