diff options
Diffstat (limited to '')
17 files changed, 117 insertions, 29 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e5a78a914..feca5105f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -74,6 +74,11 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::ClipDistance7: { const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; const u32 index{static_cast<u32>(attr) - base}; + if (index >= ctx.profile.max_user_clip_distances) { + LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index, + ctx.profile.max_user_clip_distances); + return std::nullopt; + } const Id clip_num{ctx.Const(index)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 3350f1f85..2abc21a17 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1528,7 +1528,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } - const Id type{TypeArray(F32[1], Const(8U))}; + const Id type{TypeArray( + F32[1], Const(std::min(info.used_clip_distances, profile.max_user_clip_distances)))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores[IR::Attribute::Layer] && diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 70292686f..cb82a326c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -913,7 +913,11 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } for (size_t index = 0; index < 8; ++index) { const u16 mask{header.vtg.omap_systemc.clip_distances}; - info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + const bool used{((mask >> index) & 1) != 0}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, used); + if (used) { + info.used_clip_distances = static_cast<u32>(index) + 1; + } } info.stores.Set(IR::Attribute::PrimitiveId, header.vtg.omap_systemb.primitive_array_id != 0); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 66901a965..7578d41cc 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -87,6 +87,8 @@ struct Profile { bool has_broken_robust{}; u64 min_ssbo_alignment{}; + + u32 max_user_clip_distances{}; }; } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index b4b4afd37..1419b8fe7 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -227,6 +227,8 @@ struct Info { bool requires_layer_emulation{}; IR::Attribute emulated_layer{}; + u32 used_clip_distances{}; + boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> constant_buffer_descriptors; boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index a71866b75..b787b6994 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -58,6 +58,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); } glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); + if (runtime.has_unified_vertex_buffers) { + glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); + } } void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { @@ -109,6 +112,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, : device{device_}, staging_buffer_pool{staging_buffer_pool_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, + has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { GLint gl_max_attributes; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); @@ -210,8 +214,14 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz } void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); - index_buffer_offset = offset; + if (has_unified_vertex_buffers) { + buffer.MakeResident(GL_READ_ONLY); + glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, + static_cast<GLsizeiptr>(Common::AlignUp(size, 4))); + } else { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); + index_buffer_offset = offset; + } } void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, @@ -219,8 +229,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, if (index >= max_attributes) { return; } - glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), - static_cast<GLsizei>(stride)); + if (has_unified_vertex_buffers) { + buffer.MakeResident(GL_READ_ONLY); + glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, + buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); + } else { + glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), + static_cast<GLsizei>(stride)); + } } void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { @@ -233,9 +250,23 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi [](u64 stride) { return static_cast<GLsizei>(stride); }); const u32 count = std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index); - glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(), - reinterpret_cast<const GLintptr*>(bindings.offsets.data()), - buffer_strides.data()); + if (has_unified_vertex_buffers) { + for (u32 index = 0; index < count; ++index) { + Buffer& buffer = *bindings.buffers[index]; + buffer.MakeResident(GL_READ_ONLY); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, bindings.min_index + index, + buffer.HostGpuAddr() + bindings.offsets[index], + static_cast<GLsizeiptr>(bindings.sizes[index])); + } + static constexpr std::array<size_t, 32> ZEROS{}; + glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), + reinterpret_cast<const GLuint*>(ZEROS.data()), + reinterpret_cast<const GLintptr*>(ZEROS.data()), buffer_strides.data()); + } else { + glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(), + reinterpret_cast<const GLintptr*>(bindings.offsets.data()), + buffer_strides.data()); + } } void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 71cd45d35..1e8708f59 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -209,6 +209,7 @@ private: bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; + bool has_unified_vertex_buffers = false; bool use_storage_buffers = false; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a6c93068f..993438a27 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -200,6 +200,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_derivative_control = GLAD_GL_ARB_derivative_control; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 96034ea4a..a5a6bbbba 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -72,6 +72,10 @@ public: return has_texture_shadow_lod; } + bool HasVertexBufferUnifiedMemory() const { + return has_vertex_buffer_unified_memory; + } + bool HasASTC() const { return has_astc; } @@ -211,6 +215,7 @@ private: bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_texture_shadow_lod{}; + bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 279e5a4e0..4832c03c5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -162,14 +162,18 @@ void RasterizerOpenGL::Clear(u32 layer_count) { SyncFramebufferSRGB(); } if (regs.clear_surface.Z) { - ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); + if (regs.zeta_enable != 0) { + LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!"); + } use_depth = true; state_tracker.NotifyDepthMask(); glDepthMask(GL_TRUE); } if (regs.clear_surface.S) { - ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!"); + if (regs.zeta_enable) { + LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!"); + } use_stencil = true; } @@ -1294,15 +1298,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum program->ConfigureTransformFeedback(); UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry)); - UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); + regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); // We may have to call BeginTransformFeedbackNV here since they seem to call different // implementations on Nvidia's driver (the pointer is different) but we are using // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. - glBeginTransformFeedback(GL_POINTS); + glBeginTransformFeedback(primitive_mode); } void RasterizerOpenGL::EndTransformFeedback() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 26f2d0ea7..b5999362a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -233,6 +233,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), + .max_user_clip_distances = 8, }, host_info{ .support_float64 = true, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7a4f0c5c1..2933718b6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -168,6 +168,14 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -667,7 +675,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { glBindSampler(0, present_sampler.handle); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5958f52f7..2267069e7 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -563,22 +563,27 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi } buffer_handles.push_back(handle); } + const u32 device_max = device.GetMaxVertexInputBindings(); + const u32 min_binding = std::min(bindings.min_index, device_max); + const u32 max_binding = std::min(bindings.max_index, device_max); + const u32 binding_count = max_binding - min_binding; + if (binding_count == 0) { + return; + } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([this, bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, - std::min(bindings_.max_index - bindings_.min_index, - device.GetMaxVertexInputBindings()), - buffer_handles_.data(), bindings_.offsets.data(), - bindings_.sizes.data(), bindings_.strides.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles), + binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(), + bindings_.offsets.data(), bindings_.sizes.data(), + bindings_.strides.data()); }); } else { - scheduler.Record([this, bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings_.min_index, - std::min(bindings_.max_index - bindings_.min_index, - device.GetMaxVertexInputBindings()), - buffer_handles_.data(), bindings_.offsets.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles), + binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), + bindings_.offsets.data()); }); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a13b2a72..fa63d6228 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -374,6 +374,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_robust = device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, .min_ssbo_alignment = device.GetStorageBufferAlignment(), + .max_user_clip_distances = device.GetMaxUserClipDistances(), }; host_info = Shader::HostTranslateInfo{ diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 1fda0042d..a6fbca69e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -695,6 +695,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR std::min(properties.properties.limits.maxVertexInputBindings, 16U); } + if (is_turnip) { + LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits"); + properties.properties.limits.maxVertexInputBindings = 32; + } + if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) { LOG_INFO(Render_Vulkan, "Removing extendedDynamicState2 due to missing extendedDynamicState"); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4f3846345..701817086 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -665,6 +665,10 @@ public: return properties.properties.limits.maxViewports; } + u32 GetMaxUserClipDistances() const { + return properties.properties.limits.maxClipDistances; + } + bool SupportsConditionalBarriers() const { return supports_conditional_barriers; } diff --git a/src/yuzu/configuration/qt_config.cpp b/src/yuzu/configuration/qt_config.cpp index 417a43ec5..a71000b72 100644 --- a/src/yuzu/configuration/qt_config.cpp +++ b/src/yuzu/configuration/qt_config.cpp @@ -225,6 +225,8 @@ void QtConfig::ReadPathValues() { QString::fromStdString(ReadStringSetting(std::string("recentFiles"))) .split(QStringLiteral(", "), Qt::SkipEmptyParts, Qt::CaseSensitive); + ReadCategory(Settings::Category::Paths); + EndGroup(); } @@ -405,6 +407,8 @@ void QtConfig::SaveQtControlValues() { void QtConfig::SavePathValues() { BeginGroup(Settings::TranslateCategory(Settings::Category::Paths)); + WriteCategory(Settings::Category::Paths); + WriteSetting(std::string("romsPath"), UISettings::values.roms_path); BeginArray(std::string("gamedirs")); for (int i = 0; i < UISettings::values.game_dirs.size(); ++i) { |