diff options
Diffstat (limited to 'src')
46 files changed, 1292 insertions, 1289 deletions
diff --git a/src/common/input.h b/src/common/input.h index fc14fd7bf..d27b1d772 100644 --- a/src/common/input.h +++ b/src/common/input.h @@ -292,9 +292,6 @@ class InputDevice { public: virtual ~InputDevice() = default; - // Request input device to update if necessary - virtual void SoftUpdate() {} - // Force input device to update data regardless of the current state virtual void ForceUpdate() {} diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 2eaded242..1638b79f5 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) { // Renderer values.fsr_sharpening_slider.SetGlobal(true); values.renderer_backend.SetGlobal(true); + values.renderer_force_max_clock.SetGlobal(true); values.vulkan_device.SetGlobal(true); values.aspect_ratio.SetGlobal(true); values.max_anisotropy.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index d9e82087d..a457e3f23 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -415,6 +415,7 @@ struct Values { // Renderer SwitchableSetting<RendererBackend, true> renderer_backend{ RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; + SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; Setting<bool> renderer_debug{false, "debug"}; Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 947747d36..2a7570073 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.enable_cycle_counting = true; // Code cache size +#ifdef ARCHITECTURE_arm64 + config.code_cache_size = 128_MiB; +#else config.code_cache_size = 512_MiB; +#endif // Allow memory fault handling to work if (system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 3df943df7..7229fdc2a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.enable_cycle_counting = true; // Code cache size +#ifdef ARCHITECTURE_arm64 + config.code_cache_size = 128_MiB; +#else config.code_cache_size = 512_MiB; +#endif // Allow memory fault handling to work if (system.DebuggerEnabled()) { diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index 71364c323..7a01f3f4c 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -1434,16 +1434,6 @@ AnalogSticks EmulatedController::GetSticks() const { return {}; } - // Some drivers like stick from buttons need constant refreshing - for (auto& device : stick_devices) { - if (!device) { - continue; - } - lock.unlock(); - device->SoftUpdate(); - lock.lock(); - } - return controller.analog_stick_state; } diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp index 447fbffaa..282ea1ff9 100644 --- a/src/core/internal_network/network.cpp +++ b/src/core/internal_network/network.cpp @@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) { return Errno::NETUNREACH; case WSAEMSGSIZE: return Errno::MSGSIZE; + case WSAETIMEDOUT: + return Errno::TIMEDOUT; default: UNIMPLEMENTED_MSG("Unimplemented errno={}", e); return Errno::OTHER; @@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) { return Errno::NETUNREACH; case EMSGSIZE: return Errno::MSGSIZE; + case ETIMEDOUT: + return Errno::TIMEDOUT; default: UNIMPLEMENTED_MSG("Unimplemented errno={}", e); return Errno::OTHER; @@ -226,7 +230,7 @@ Errno GetAndLogLastError() { int e = errno; #endif const Errno err = TranslateNativeError(e); - if (err == Errno::AGAIN) { + if (err == Errno::AGAIN || err == Errno::TIMEDOUT) { return err; } LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); diff --git a/src/input_common/helpers/stick_from_buttons.cpp b/src/input_common/helpers/stick_from_buttons.cpp index 82aa6ac2f..f3a0b3419 100644 --- a/src/input_common/helpers/stick_from_buttons.cpp +++ b/src/input_common/helpers/stick_from_buttons.cpp @@ -13,11 +13,11 @@ class Stick final : public Common::Input::InputDevice { public: using Button = std::unique_ptr<Common::Input::InputDevice>; - Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, + Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, Button updater_, float modifier_scale_, float modifier_angle_) : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)), - right(std::move(right_)), modifier(std::move(modifier_)), modifier_scale(modifier_scale_), - modifier_angle(modifier_angle_) { + right(std::move(right_)), modifier(std::move(modifier_)), updater(std::move(updater_)), + modifier_scale(modifier_scale_), modifier_angle(modifier_angle_) { up->SetCallback({ .on_change = [this](const Common::Input::CallbackStatus& callback_) { @@ -48,6 +48,9 @@ public: UpdateModButtonStatus(callback_); }, }); + updater->SetCallback({ + .on_change = [this](const Common::Input::CallbackStatus& callback_) { SoftUpdate(); }, + }); last_x_axis_value = 0.0f; last_y_axis_value = 0.0f; } @@ -248,7 +251,7 @@ public: modifier->ForceUpdate(); } - void SoftUpdate() override { + void SoftUpdate() { Common::Input::CallbackStatus status{ .type = Common::Input::InputType::Stick, .stick_status = GetStatus(), @@ -308,6 +311,7 @@ private: Button left; Button right; Button modifier; + Button updater; float modifier_scale{}; float modifier_angle{}; float angle{}; @@ -331,11 +335,12 @@ std::unique_ptr<Common::Input::InputDevice> StickFromButton::Create( auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine)); auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine)); auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine)); + auto updater = Common::Input::CreateInputDeviceFromString("engine:updater,button:0"); auto modifier_scale = params.Get("modifier_scale", 0.5f); auto modifier_angle = params.Get("modifier_angle", 5.5f); return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left), - std::move(right), std::move(modifier), modifier_scale, - modifier_angle); + std::move(right), std::move(modifier), std::move(updater), + modifier_scale, modifier_angle); } } // namespace InputCommon diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 4dc92f482..e0b2131ed 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -28,6 +28,28 @@ namespace InputCommon { +/// Dummy engine to get periodic updates +class UpdateEngine final : public InputEngine { +public: + explicit UpdateEngine(std::string input_engine_) : InputEngine(std::move(input_engine_)) { + PreSetController(identifier); + } + + void PumpEvents() { + SetButton(identifier, 0, last_state); + last_state = !last_state; + } + +private: + static constexpr PadIdentifier identifier = { + .guid = Common::UUID{}, + .port = 0, + .pad = 0, + }; + + bool last_state{}; +}; + struct InputSubsystem::Impl { template <typename Engine> void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) { @@ -45,6 +67,7 @@ struct InputSubsystem::Impl { void Initialize() { mapping_factory = std::make_shared<MappingFactory>(); + RegisterEngine("updater", update_engine); RegisterEngine("keyboard", keyboard); RegisterEngine("mouse", mouse); RegisterEngine("touch", touch_screen); @@ -74,6 +97,7 @@ struct InputSubsystem::Impl { } void Shutdown() { + UnregisterEngine(update_engine); UnregisterEngine(keyboard); UnregisterEngine(mouse); UnregisterEngine(touch_screen); @@ -252,6 +276,7 @@ struct InputSubsystem::Impl { } void PumpEvents() const { + update_engine->PumpEvents(); #ifdef HAVE_SDL2 sdl->PumpEvents(); #endif @@ -263,6 +288,7 @@ struct InputSubsystem::Impl { std::shared_ptr<MappingFactory> mapping_factory; + std::shared_ptr<UpdateEngine> update_engine; std::shared_ptr<Keyboard> keyboard; std::shared_ptr<Mouse> mouse; std::shared_ptr<TouchScreen> touch_screen; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index db9c94ce8..0cd87a48f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, - ctx.Const(element))); + return ctx.OpLoad( + ctx.F32[1], + ctx.need_input_position_indirect + ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, + ctx.Const(element)) + : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 2c90f2368..c5db19d09 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); } -Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { - const Id thirty_two{ctx.Const(32u)}; - const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; - const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; - return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); +Id AddPartitionBase(EmitContext& ctx, Id thread_id) { + const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))}; + const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))}; + return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base); } } // Anonymous namespace @@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - const Id thirty_two{ctx.Const(32u)}; - const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; - const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; - const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; - index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); - clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; - const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ecb2db494..a0c155fdb 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { U16 = Name(TypeInt(16, false), "u16"); S16 = Name(TypeInt(16, true), "s16"); } - if (info.uses_int64) { + if (info.uses_int64 && profile.support_int64) { AddCapability(spv::Capability::Int64); U64 = Name(TypeInt(64, false), "u64"); } @@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); - const Id pointer{is_array - ? OpAccessChain(input_f32, input_position, vertex, masked_index) - : OpAccessChain(input_f32, input_position, masked_index)}; + const Id pointer{[&]() { + if (need_input_position_indirect) { + if (is_array) + return OpAccessChain(input_f32, input_position, vertex, u32_zero_value, + masked_index); + else + return OpAccessChain(input_f32, input_position, u32_zero_value, + masked_index); + } else { + if (is_array) + return OpAccessChain(input_f32, input_position, vertex, masked_index); + else + return OpAccessChain(input_f32, input_position, masked_index); + } + }()}; const Id result{OpLoad(F32[1], pointer)}; OpReturnValue(result); ++label_index; @@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) { Decorate(layer, spv::Decoration::Flat); } if (loads.AnyComponent(IR::Attribute::PositionX)) { - const bool is_fragment{stage != Stage::Fragment}; - const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = DefineInput(*this, F32[4], true, built_in); - if (profile.support_geometry_shader_passthrough) { - if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { - Decorate(input_position, spv::Decoration::PassthroughNV); + const bool is_fragment{stage == Stage::Fragment}; + if (!is_fragment && profile.has_broken_spirv_position_input) { + need_input_position_indirect = true; + + const Id input_position_struct = TypeStruct(F32[4]); + input_position = DefineInput(*this, input_position_struct, true); + + MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn, + static_cast<unsigned>(spv::BuiltIn::Position)); + Decorate(input_position_struct, spv::Decoration::Block); + } else { + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord + : spv::BuiltIn::Position}; + input_position = DefineInput(*this, F32[4], true, built_in); + + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } } } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4414a5169..dbc5c55b9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -280,6 +280,7 @@ public: Id write_global_func_u32x2{}; Id write_global_func_u32x4{}; + bool need_input_position_indirect{}; Id input_position{}; std::array<Id, 32> input_generics{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ac159d24b..a42453e90 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( } return mapping; } + +void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program, + const Shader::VaryingState& passthrough_mask, + bool passthrough_position, + std::optional<IR::Attribute> passthrough_layer_attr) { + for (u32 i = 0; i < program.output_vertices; i++) { + // Assign generics from input + for (u32 j = 0; j < 32; j++) { + if (!passthrough_mask.Generic(j)) { + continue; + } + + const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); + ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + } + + if (passthrough_position) { + // Assign position from input + const IR::Attribute attr = IR::Attribute::PositionX; + ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + } + + if (passthrough_layer_attr) { + // Assign layer + ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr), + ir.Imm32(0)); + } + + // Emit vertex + ir.EmitVertex(ir.Imm32(0)); + } + ir.EndPrimitive(ir.Imm32(0)); +} + +u32 GetOutputTopologyVertices(OutputTopology output_topology) { + switch (output_topology) { + case OutputTopology::PointList: + return 1; + case OutputTopology::LineStrip: + return 2; + default: + return 3; + } +} + +void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + EmitGeometryPassthrough( + ir, program, program.info.passthrough, + program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {}); + } + } + } +} + } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, @@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; if (program.is_geometry_passthrough) { const auto& mask{env.GpPassthroughMask()}; - for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + for (size_t i = 0; i < mask.size() * 32; ++i) { program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; } + + if (!host_info.support_geometry_shader_passthrough) { + program.output_vertices = GetOutputTopologyVertices(program.output_topology); + LowerGeometryPassthrough(program, host_info); + } } break; } @@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo Optimization::PositionPass(env, program); - Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program, host_info); Optimization::TexturePass(env, program, host_info); if (Settings::values.resolution_info.active) { @@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, IR::Program program; program.stage = Stage::Geometry; program.output_topology = output_topology; - switch (output_topology) { - case OutputTopology::PointList: - program.output_vertices = 1; - break; - case OutputTopology::LineStrip: - program.output_vertices = 2; - break; - default: - program.output_vertices = 3; - break; - } + program.output_vertices = GetOutputTopologyVertices(output_topology); program.is_geometry_passthrough = false; program.info.loads.mask = source_program.info.stores.mask; @@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, node.data.block = current_block; IR::IREmitter ir{*current_block}; - for (u32 i = 0; i < program.output_vertices; i++) { - // Assign generics from input - for (u32 j = 0; j < 32; j++) { - if (!program.info.stores.Generic(j)) { - continue; - } - - const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); - ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); - } - - // Assign position from input - const IR::Attribute attr = IR::Attribute::PositionX; - ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); - - // Assign layer - ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), - ir.Imm32(0)); - - // Emit vertex - ir.EmitVertex(ir.Imm32(0)); - } - ir.EndPrimitive(ir.Imm32(0)); + EmitGeometryPassthrough(ir, program, program.info.stores, true, + source_program.info.emulated_layer); IR::Block* return_block{block_pool.Create(inst_pool)}; IR::IREmitter{*return_block}.Epilogue(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index d5d279554..55fc48768 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -15,6 +15,9 @@ struct HostTranslateInfo { bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS + u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs + bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry + ///< passthrough shaders }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 336338e62..9101722ba 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { @@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { @@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + + // Align the offset base to match the host alignment requirements + low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); return ir.ISub(offset, low_cbuf); } @@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program) { +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + const IR::U32 offset{ + StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95..4ffad1172 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -15,7 +15,7 @@ namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program); +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index b8841a536..253e0d0bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -55,6 +55,8 @@ struct Profile { /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + /// The Position builtin needs to be wrapped in a struct when used as an input + bool has_broken_spirv_position_input{}; /// Offset image operands with an unsigned type do not work bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 44236b6b1..f93181e1e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -65,6 +65,8 @@ enum class Interpolation { struct ConstantBufferDescriptor { u32 index; u32 count; + + auto operator<=>(const ConstantBufferDescriptor&) const = default; }; struct StorageBufferDescriptor { @@ -72,6 +74,8 @@ struct StorageBufferDescriptor { u32 cbuf_offset; u32 count; bool is_written; + + auto operator<=>(const StorageBufferDescriptor&) const = default; }; struct TextureBufferDescriptor { @@ -84,6 +88,8 @@ struct TextureBufferDescriptor { u32 secondary_shift_left; u32 count; u32 size_shift; + + auto operator<=>(const TextureBufferDescriptor&) const = default; }; using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; @@ -95,6 +101,8 @@ struct ImageBufferDescriptor { u32 cbuf_offset; u32 count; u32 size_shift; + + auto operator<=>(const ImageBufferDescriptor&) const = default; }; using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; @@ -110,6 +118,8 @@ struct TextureDescriptor { u32 secondary_shift_left; u32 count; u32 size_shift; + + auto operator<=>(const TextureDescriptor&) const = default; }; using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; @@ -122,6 +132,8 @@ struct ImageDescriptor { u32 cbuf_offset; u32 count; u32 size_shift; + + auto operator<=>(const ImageDescriptor&) const = default; }; using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b7095ae13..f617665de 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -191,6 +191,8 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_texture_cache_base.cpp + renderer_vulkan/vk_turbo_mode.cpp + renderer_vulkan/vk_turbo_mode.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 06fd40851..627917ab6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); - const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + const u32 alignment = runtime.GetStorageBufferAlignment(); + + const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); + const u32 aligned_size = + Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment); + + const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); if (!cpu_addr || size == 0) { return NULL_BINDING; } - const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); + + const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); const Binding binding{ .cpu_addr = *cpu_addr, - .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), + .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr), .buffer_id = BufferId{}, }; return binding; diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e6dc24f22..f275b2aa9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -47,6 +47,7 @@ set(SHADER_FILES vulkan_present_scaleforce_fp16.frag vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp + vulkan_turbo_mode.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp new file mode 100644 index 000000000..d651001d9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0) buffer ThreadData { + uint data[]; +}; + +uint xorshift32(uint x) { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return x; +} + +uint getGlobalIndex() { + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y; +} + +void main() { + uint myIndex = xorshift32(getGlobalIndex()); + uint otherIndex = xorshift32(myIndex); + + uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1; + atomicAdd(data[myIndex % data.length()], otherValue); +} diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a8c3f8b67..bb1962073 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -160,6 +160,10 @@ public: return device.CanReportMemoryUsage(); } + u32 GetStorageBufferAlignment() const { + return static_cast<u32>(device.GetShaderStorageBufferAlignment()); + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7d48af8e1..181857d9c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,6 +139,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load void RasterizerOpenGL::Clear(u32 layer_count) { MICROPROFILE_SCOPE(OpenGL_Clears); + gpu_memory->FlushCaching(); const auto& regs = maxwell3d->regs; bool use_color{}; bool use_depth{}; @@ -207,6 +208,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; @@ -319,6 +321,7 @@ void RasterizerOpenGL::DrawIndirect() { } void RasterizerOpenGL::DispatchCompute() { + gpu_memory->FlushCaching(); ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; if (!pipeline) { return; @@ -526,6 +529,7 @@ void RasterizerOpenGL::TickFrame() { } bool RasterizerOpenGL::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 03b6314ff..7dd854e0f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), + .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bc75680f0..de95f2634 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -442,7 +442,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, screen_info.display_texture); - const auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + if (anti_aliasing > Settings::AntiAliasing::LastAA) { + LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); + anti_aliasing = Settings::AntiAliasing::None; + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } + if (anti_aliasing != Settings::AntiAliasing::None) { glEnablei(GL_SCISSOR_TEST, 0); auto viewport_width = screen_info.texture.width; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bf97d25a4..2a8d9e377 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -60,24 +60,13 @@ std::string GetDriverVersion(const Device& device) { return GetReadableVersion(version); } -std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { - std::sort(std::begin(available_extensions), std::end(available_extensions)); - - static constexpr std::size_t AverageExtensionSize = 64; - std::string separated_extensions; - separated_extensions.reserve(available_extensions.size() * AverageExtensionSize); - - const auto end = std::end(available_extensions); - for (auto extension = std::begin(available_extensions); extension != end; ++extension) { - if (const bool is_last = extension + 1 == end; is_last) { - separated_extensions += *extension; - } else { - separated_extensions += fmt::format("{},", *extension); - } - } - return separated_extensions; +std::string BuildCommaSeparatedExtensions( + const std::set<std::string, std::less<>>& available_extensions) { + return fmt::format("{}", fmt::join(available_extensions, ",")); } +} // Anonymous namespace + Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, VkSurfaceKHR surface) { const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); @@ -89,7 +78,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl const vk::PhysicalDevice physical_device(devices[device_index], dld); return Device(*instance, physical_device, surface, dld); } -} // Anonymous namespace RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, @@ -109,6 +97,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, screen_info), rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, state_tracker, scheduler) { + if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { + turbo_mode.emplace(instance, dld); + scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); + } Report(); } catch (const vk::Exception& exception) { LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); @@ -116,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, } RendererVulkan::~RendererVulkan() { + scheduler.RegisterOnSubmit([] {}); void(device.GetLogical().WaitIdle()); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index e7bfecb20..009e75e0d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -13,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_turbo_mode.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -31,6 +32,9 @@ class GPU; namespace Vulkan { +Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, + VkSurfaceKHR surface); + class RendererVulkan final : public VideoCore::RendererBase { public: explicit RendererVulkan(Core::TelemetrySession& telemtry_session, @@ -74,6 +78,7 @@ private: Swapchain swapchain; BlitScreen blit_screen; RasterizerVulkan rasterizer; + std::optional<TurboMode> turbo_mode; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index b0153a502..1cfb4c2ff 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -330,6 +330,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } +u32 BufferCacheRuntime::GetStorageBufferAlignment() const { + return static_cast<u32>(device.GetStorageBufferAlignment()); +} + void BufferCacheRuntime::Finish() { scheduler.Finish(); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 183b33632..06539c733 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -73,6 +73,8 @@ public: bool CanReportMemoryUsage() const; + u32 GetStorageBufferAlignment() const; + [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 67e5bc648..7e69b11d8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -331,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .need_declared_frag_colors = false, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, + .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, @@ -343,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), + .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), }; if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { @@ -790,7 +793,8 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem:: return create_pipeline_cache(0, nullptr); } - const size_t cache_size = static_cast<size_t>(end) - magic_number.size(); + static constexpr size_t header_size = magic_number.size() + sizeof(cache_version); + const size_t cache_size = static_cast<size_t>(end) - header_size; std::vector<char> cache_data(cache_size); file.read(cache_data.data(), cache_size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ed4a72166..b75b8eec6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -270,6 +270,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -628,6 +629,7 @@ void RasterizerVulkan::TickFrame() { } bool RasterizerVulkan::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c2e53a5d5..e03685af1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; + + if (on_submit) { + on_submit(); + } + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3858c506c..bd4cb0f7e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include <condition_variable> #include <cstddef> +#include <functional> #include <memory> #include <thread> #include <utility> @@ -66,6 +67,11 @@ public: query_cache = &query_cache_; } + // Registers a callback to perform on queue submission. + void RegisterOnSubmit(std::function<void()>&& func) { + on_submit = std::move(func); + } + /// Send work to a separate thread. template <typename T> void Record(T&& command) { @@ -216,6 +222,7 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr<CommandChunk> chunk; + std::function<void()> on_submit; State state; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp new file mode 100644 index 000000000..c42594149 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -0,0 +1,222 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/literals.h" +#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_turbo_mode.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +using namespace Common::Literals; + +TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { + { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + } + m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); +} + +TurboMode::~TurboMode() = default; + +void TurboMode::QueueSubmitted() { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + m_submission_cv.notify_one(); +} + +void TurboMode::Run(std::stop_token stop_token) { + auto& dld = m_device.GetLogical(); + + // Allocate buffer. 2MiB should be sufficient. + auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 2_MiB, + .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + + // Commit some device local memory for the buffer. + auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + + // Create the descriptor pool to contain our descriptor. + constexpr VkDescriptorPoolSize pool_size{ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + }; + + auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = &pool_size, + }); + + // Create the descriptor set layout from the pool. + constexpr VkDescriptorSetLayoutBinding layout_binding{ + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; + + auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = 1, + .pBindings = &layout_binding, + }); + + // Actually create the descriptor set. + auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + }); + + // Create the shader. + auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV); + + // Create the pipeline layout. + auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + + // Actually create the pipeline. + const VkPipelineShaderStageCreateInfo shader_stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + + auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = shader_stage, + .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); + + // Create a fence to wait on. + auto fence = dld.CreateFence(VkFenceCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }); + + // Create a command pool to allocate a command buffer from. + auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = m_device.GetGraphicsFamily(), + }); + + // Create a single command buffer. + auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); + auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; + + while (!stop_token.stop_requested()) { + // Reset the fence. + fence.Reset(); + + // Update descriptor set. + const VkDescriptorBufferInfo buffer_info{ + .buffer = *buffer, + .offset = 0, + .range = VK_WHOLE_SIZE, + }; + + const VkWriteDescriptorSet buffer_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set[0], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pImageInfo = nullptr, + .pBufferInfo = &buffer_info, + .pTexelBufferView = nullptr, + }; + + dld.UpdateDescriptorSets(std::array{buffer_write}, {}); + + // Set up the command buffer. + cmdbuf.Begin(VkCommandBufferBeginInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); + + // Clear the buffer. + cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0); + + // Bind descriptor set. + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, {}); + + // Bind the pipeline. + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + + // Dispatch. + cmdbuf.Dispatch(64, 64, 1); + + // Finish. + cmdbuf.End(); + + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr, + }; + + m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence); + + // Wait for completion. + fence.Wait(); + + // Wait for the next graphics queue submission if necessary. + std::unique_lock lk{m_submission_lock}; + Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { + return (std::chrono::steady_clock::now() - m_submission_time) <= + std::chrono::milliseconds{100}; + }); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h new file mode 100644 index 000000000..99b5ac50b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <chrono> +#include <mutex> + +#include "common/polyfill_thread.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class TurboMode { +public: + explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); + ~TurboMode(); + + void QueueSubmitted(); + +private: + void Run(std::stop_token stop_token); + + Device m_device; + MemoryAllocator m_allocator; + std::mutex m_submission_lock; + std::condition_variable_any m_submission_cv; + std::chrono::time_point<std::chrono::steady_clock> m_submission_time{}; + + std::jthread m_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8e77f5aa3..1458ec4c8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -74,30 +74,6 @@ enum class NvidiaArchitecture { VoltaOrOlder, }; -constexpr std::array REQUIRED_EXTENSIONS{ - VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, -#ifdef _WIN32 - VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, -#endif -#ifdef __unix__ - VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, -#endif -}; - -constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{ - VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, - VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, -}; - -constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{ - VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, -}; - template <typename T> void SetNext(void**& next, T& data) { *next = &data; @@ -286,24 +262,9 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica return format_properties; } -std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) { - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector<std::string> supported_extensions; - supported_extensions.reserve(extensions.size()); - for (const auto& extension : extensions) { - supported_extensions.emplace_back(extension.extensionName); - } - return supported_extensions; -} - -bool IsExtensionSupported(std::span<const std::string> supported_extensions, - std::string_view extension) { - return std::ranges::find(supported_extensions, extension) != supported_extensions.end(); -} - NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, - std::span<const std::string> exts) { - if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { + const std::set<std::string, std::less<>>& exts) { + if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; shading_rate_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; @@ -316,423 +277,39 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, return NvidiaArchitecture::AmpereOrNewer; } } - if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { + if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { return NvidiaArchitecture::Turing; } return NvidiaArchitecture::VoltaOrOlder; } + +std::vector<const char*> ExtensionListForVulkan( + const std::set<std::string, std::less<>>& extensions) { + std::vector<const char*> output; + for (const auto& extension : extensions) { + output.push_back(extension.c_str()); + } + return output; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) - : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions( - physical)}, + : instance{instance_}, dld{dld_}, physical{physical_}, format_properties(GetFormatProperties(physical)) { - CheckSuitability(surface != nullptr); + if (!GetSuitability(surface != nullptr)) { + throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); + } SetupFamilies(surface); - SetupFeatures(); - SetupProperties(); - const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(surface != nullptr); - - VkPhysicalDeviceFeatures2 features2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - .pNext = nullptr, - .features{ - .robustBufferAccess = true, - .fullDrawIndexUint32 = false, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, - .sampleRateShading = true, - .dualSrcBlend = true, - .logicOp = true, - .multiDrawIndirect = true, - .drawIndirectFirstInstance = true, - .depthClamp = true, - .depthBiasClamp = true, - .fillModeNonSolid = true, - .depthBounds = is_depth_bounds_supported, - .wideLines = true, - .largePoints = true, - .alphaToOne = false, - .multiViewport = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = is_optimal_astc_supported, - .textureCompressionBC = false, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = is_shader_storage_image_multisample, - .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = true, - .shaderCullDistance = true, - .shaderFloat64 = is_shader_float64_supported, - .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = is_shader_int16_supported, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }, - }; - const void* first_next = &features2; - void** next = &features2.pNext; - - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, - .pNext = nullptr, - .timelineSemaphore = true, - }; - SetNext(next, timeline_semaphore); - - VkPhysicalDevice16BitStorageFeatures bit16_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, - .pNext = nullptr, - .storageBuffer16BitAccess = true, - .uniformAndStorageBuffer16BitAccess = true, - .storagePushConstant16 = false, - .storageInputOutput16 = false, - }; - SetNext(next, bit16_storage); - - VkPhysicalDevice8BitStorageFeatures bit8_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, - .pNext = nullptr, - .storageBuffer8BitAccess = true, - .uniformAndStorageBuffer8BitAccess = true, - .storagePushConstant8 = false, - }; - SetNext(next, bit8_storage); - - VkPhysicalDeviceRobustness2FeaturesEXT robustness2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, - .pNext = nullptr, - .robustBufferAccess2 = true, - .robustImageAccess2 = true, - .nullDescriptor = true, - }; - SetNext(next, robustness2); - - VkPhysicalDeviceHostQueryResetFeatures host_query_reset{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, - .pNext = nullptr, - .hostQueryReset = true, - }; - SetNext(next, host_query_reset); - - VkPhysicalDeviceVariablePointerFeatures variable_pointers{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, - .pNext = nullptr, - .variablePointersStorageBuffer = VK_TRUE, - .variablePointers = VK_TRUE, - }; - SetNext(next, variable_pointers); - - VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES, - .pNext = nullptr, - .shaderDemoteToHelperInvocation = true, - }; - SetNext(next, demote); - - VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES, - .pNext = nullptr, - .shaderDrawParameters = true, - }; - SetNext(next, draw_parameters); - - VkPhysicalDeviceShaderFloat16Int8Features float16_int8; - if (is_int8_supported || is_float16_supported) { - float16_int8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, - .pNext = nullptr, - .shaderFloat16 = is_float16_supported, - .shaderInt8 = is_int8_supported, - }; - SetNext(next, float16_int8); - } - if (!is_float16_supported) { - LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); - } - if (!is_int8_supported) { - LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively"); - } - - if (!nv_viewport_swizzle) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); - } - - if (!nv_viewport_array2) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); - } - - if (!nv_geometry_shader_passthrough) { - LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); - } - - VkPhysicalDeviceUniformBufferStandardLayoutFeatures std430_layout; - if (khr_uniform_buffer_standard_layout) { - std430_layout = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES, - .pNext = nullptr, - .uniformBufferStandardLayout = true, - }; - SetNext(next, std430_layout); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); - } - - VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; - if (ext_index_type_uint8) { - index_type_uint8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, - .pNext = nullptr, - .indexTypeUint8 = true, - }; - SetNext(next, index_type_uint8); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); - } - - VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart; - if (is_topology_list_restart_supported || is_patch_list_restart_supported) { - primitive_topology_list_restart = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT, - .pNext = nullptr, - .primitiveTopologyListRestart = is_topology_list_restart_supported, - .primitiveTopologyPatchListRestart = is_patch_list_restart_supported, - }; - SetNext(next, primitive_topology_list_restart); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart"); - } - - VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; - if (ext_transform_feedback) { - transform_feedback = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, - .pNext = nullptr, - .transformFeedback = true, - .geometryStreams = true, - }; - SetNext(next, transform_feedback); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); - } - - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; - if (ext_custom_border_color) { - custom_border = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, - .pNext = nullptr, - .customBorderColors = VK_TRUE, - .customBorderColorWithoutFormat = VK_TRUE, - }; - SetNext(next, custom_border); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); - } - - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - if (ext_extended_dynamic_state) { - dynamic_state = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState = VK_TRUE, - }; - SetNext(next, dynamic_state); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); - } - - VkPhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state_2; - if (ext_extended_dynamic_state_2) { - dynamic_state_2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState2 = VK_TRUE, - .extendedDynamicState2LogicOp = ext_extended_dynamic_state_2_extra ? VK_TRUE : VK_FALSE, - .extendedDynamicState2PatchControlPoints = VK_FALSE, - }; - SetNext(next, dynamic_state_2); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 2"); - } - VkPhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3; - if (ext_extended_dynamic_state_3) { - dynamic_state_3 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState3TessellationDomainOrigin = VK_FALSE, - .extendedDynamicState3DepthClampEnable = - ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3PolygonMode = VK_FALSE, - .extendedDynamicState3RasterizationSamples = VK_FALSE, - .extendedDynamicState3SampleMask = VK_FALSE, - .extendedDynamicState3AlphaToCoverageEnable = VK_FALSE, - .extendedDynamicState3AlphaToOneEnable = VK_FALSE, - .extendedDynamicState3LogicOpEnable = - ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEnable = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEquation = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorWriteMask = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3RasterizationStream = VK_FALSE, - .extendedDynamicState3ConservativeRasterizationMode = VK_FALSE, - .extendedDynamicState3ExtraPrimitiveOverestimationSize = VK_FALSE, - .extendedDynamicState3DepthClipEnable = VK_FALSE, - .extendedDynamicState3SampleLocationsEnable = VK_FALSE, - .extendedDynamicState3ColorBlendAdvanced = VK_FALSE, - .extendedDynamicState3ProvokingVertexMode = VK_FALSE, - .extendedDynamicState3LineRasterizationMode = VK_FALSE, - .extendedDynamicState3LineStippleEnable = VK_FALSE, - .extendedDynamicState3DepthClipNegativeOneToOne = VK_FALSE, - .extendedDynamicState3ViewportWScalingEnable = VK_FALSE, - .extendedDynamicState3ViewportSwizzle = VK_FALSE, - .extendedDynamicState3CoverageToColorEnable = VK_FALSE, - .extendedDynamicState3CoverageToColorLocation = VK_FALSE, - .extendedDynamicState3CoverageModulationMode = VK_FALSE, - .extendedDynamicState3CoverageModulationTableEnable = VK_FALSE, - .extendedDynamicState3CoverageModulationTable = VK_FALSE, - .extendedDynamicState3CoverageReductionMode = VK_FALSE, - .extendedDynamicState3RepresentativeFragmentTestEnable = VK_FALSE, - .extendedDynamicState3ShadingRateImageEnable = VK_FALSE, - }; - SetNext(next, dynamic_state_3); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 3"); - } - - VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; - if (ext_line_rasterization) { - line_raster = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, - .pNext = nullptr, - .rectangularLines = VK_TRUE, - .bresenhamLines = VK_FALSE, - .smoothLines = VK_TRUE, - .stippledRectangularLines = VK_FALSE, - .stippledBresenhamLines = VK_FALSE, - .stippledSmoothLines = VK_FALSE, - }; - SetNext(next, line_raster); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); - } - - if (!ext_conservative_rasterization) { - LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); - } - - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; - if (ext_provoking_vertex) { - provoking_vertex = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, - .pNext = nullptr, - .provokingVertexLast = VK_TRUE, - .transformFeedbackPreservesProvokingVertex = VK_TRUE, - }; - SetNext(next, provoking_vertex); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); - } - - VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; - if (ext_vertex_input_dynamic_state) { - vertex_input_dynamic = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .vertexInputDynamicState = VK_TRUE, - }; - SetNext(next, vertex_input_dynamic); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); - } - - VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; - if (ext_shader_atomic_int64) { - atomic_int64 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES, - .pNext = nullptr, - .shaderBufferInt64Atomics = VK_TRUE, - .shaderSharedInt64Atomics = VK_TRUE, - }; - SetNext(next, atomic_int64); - } - - VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; - if (khr_workgroup_memory_explicit_layout && is_shader_int16_supported) { - workgroup_layout = { - .sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, - .pNext = nullptr, - .workgroupMemoryExplicitLayout = VK_TRUE, - .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, - .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, - .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, - }; - SetNext(next, workgroup_layout); - } else if (khr_workgroup_memory_explicit_layout) { - // TODO(lat9nq): Find a proper fix for this - LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_workgroup_memory_explicit_layout due to a " - "yuzu bug when host driver does not support 16-bit integers"); - khr_workgroup_memory_explicit_layout = false; - } - - VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; - if (khr_pipeline_executable_properties) { - LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times"); - executable_properties = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR, - .pNext = nullptr, - .pipelineExecutableInfo = VK_TRUE, - }; - SetNext(next, executable_properties); - } - - if (!ext_depth_range_unrestricted) { - LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); - } - - VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; - if (ext_depth_clip_control) { - depth_clip_control_features = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT, - .pNext = nullptr, - .depthClipControl = VK_TRUE, - }; - SetNext(next, depth_clip_control_features); - } + // GetSuitability has already configured the linked list of features for us. + // Reuse it here. + const void* first_next = &features2; - VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { + VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv{}; + if (Settings::values.enable_nsight_aftermath && extensions.device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); diagnostics_nv = { @@ -744,33 +321,48 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; first_next = &diagnostics_nv; } - logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); - is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; - is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; - is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || - properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); + is_optimal_astc_supported = ComputeIsOptimalAstcSupported(); + is_warp_potentially_bigger = !extensions.subgroup_size_control || + properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize; + + is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; + is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || + properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; + + supports_d24_depth = + IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); CollectPhysicalMemoryInfo(); - CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { - const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; + const VkDriverId driver_id = properties.driver.driverID; + const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; + const bool is_amd_driver = + driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; + const bool is_amd = is_amd_driver || is_radv; + const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; + const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; + const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + if (is_nvidia) { + const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; const auto arch = GetNvidiaArchitecture(physical, supported_extensions); switch (arch) { case NvidiaArchitecture::AmpereOrNewer: - LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); - is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); + features.shader_float16_int8.shaderFloat16 = false; break; case NvidiaArchitecture::Turing: break; case NvidiaArchitecture::VoltaOrOlder: if (nv_major_version < 527) { - LOG_WARNING(Render_Vulkan, - "Blacklisting Volta and older from VK_KHR_push_descriptor"); - khr_push_descriptor = false; + LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); + extensions.push_descriptor = false; + loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } break; } @@ -779,75 +371,75 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR cant_blit_msaa = true; } } - const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; - if (ext_extended_dynamic_state && is_radv) { + if (extensions.extended_dynamic_state && is_radv) { // Mask driver version variant - const u32 version = (properties.driverVersion << 3) >> 3; + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { LOG_WARNING(Render_Vulkan, "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); - ext_extended_dynamic_state = false; + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); } } - if (ext_vertex_input_dynamic_state && is_radv) { + if (extensions.extended_dynamic_state2 && is_radv) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { + LOG_WARNING( + Render_Vulkan, + "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2"); + features.extended_dynamic_state2.extendedDynamicState2 = false; + features.extended_dynamic_state2.extendedDynamicState2LogicOp = false; + features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints = false; + extensions.extended_dynamic_state2 = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); + } + } + if (extensions.vertex_input_dynamic_state && is_radv) { // TODO(ameerj): Blacklist only offending driver versions // TODO(ameerj): Confirm if RDNA1 is affected const bool is_rdna2 = - IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); + supported_extensions.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); if (is_rdna2) { LOG_WARNING(Render_Vulkan, "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); - ext_vertex_input_dynamic_state = false; + extensions.vertex_input_dynamic_state = false; + loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); } } - if (ext_extended_dynamic_state_2 && is_radv) { - const u32 version = (properties.driverVersion << 3) >> 3; - if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { - LOG_WARNING( - Render_Vulkan, - "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2"); - ext_extended_dynamic_state_2 = false; - ext_extended_dynamic_state_2_extra = false; - } - } - sets_per_pool = 64; - const bool is_amd = - driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; - if (is_amd) { + sets_per_pool = 64; + if (is_amd_driver) { // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. sets_per_pool = 96; // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken. - if (!is_float16_supported) { - LOG_WARNING( - Render_Vulkan, - "AMD GCN4 and earlier do not properly support VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); + if (!features.shader_float16_int8.shaderFloat16) { + LOG_WARNING(Render_Vulkan, + "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); has_broken_cube_compatibility = true; } } - const bool is_amd_or_radv = is_amd || is_radv; - if (ext_sampler_filter_minmax && is_amd_or_radv) { + if (extensions.sampler_filter_minmax && is_amd) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. - if (!is_float16_supported) { + if (!features.shader_float16_int8.shaderFloat16) { LOG_WARNING(Render_Vulkan, - "Blacklisting AMD GCN4 and earlier for VK_EXT_sampler_filter_minmax"); - ext_sampler_filter_minmax = false; + "AMD GCN4 and earlier have broken VK_EXT_sampler_filter_minmax"); + extensions.sampler_filter_minmax = false; + loaded_extensions.erase(VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME); } } - const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; - const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - if (ext_vertex_input_dynamic_state && is_intel_windows) { - const u32 version = (properties.driverVersion << 3) >> 3; + if (extensions.vertex_input_dynamic_state && is_intel_windows) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) { - LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); - ext_vertex_input_dynamic_state = false; + LOG_WARNING(Render_Vulkan, "Intel has broken VK_EXT_vertex_input_dynamic_state"); + extensions.vertex_input_dynamic_state = false; + loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); } } - if (is_float16_supported && is_intel_windows) { + if (features.shader_float16_int8.shaderFloat16 && is_intel_windows) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Intel has broken float16 math"); + features.shader_float16_int8.shaderFloat16 = false; } if (is_intel_windows) { LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); @@ -858,9 +450,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR must_emulate_bgr565 = true; } - supports_d24_depth = - IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); + logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), + first_next, dld); graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); @@ -915,7 +506,7 @@ void Device::SaveShader(std::span<const u32> spirv) const { } } -bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { +bool Device::ComputeIsOptimalAstcSupported() const { // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, @@ -933,7 +524,7 @@ bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) co VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, }; - if (!features.textureCompressionASTC_LDR) { + if (!features.features.textureCompressionASTC_LDR) { return false; } const auto format_feature_usage{ @@ -971,7 +562,7 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want } std::string Device::GetDriverName() const { - switch (driver_id) { + switch (properties.driver.driverID) { case VK_DRIVER_ID_AMD_PROPRIETARY: return "AMD"; case VK_DRIVER_ID_AMD_OPEN_SOURCE: @@ -987,510 +578,336 @@ std::string Device::GetDriverName() const { case VK_DRIVER_ID_MESA_LLVMPIPE: return "LAVAPIPE"; default: - return vendor_name; + return properties.driver.driverName; } } -static std::vector<const char*> ExtensionsRequiredForInstanceVersion(u32 available_version) { - std::vector<const char*> extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()}; +bool Device::ShouldBoostClocks() const { + const auto driver_id = properties.driver.driverID; + const auto vendor_id = properties.properties.vendorID; + const auto device_id = properties.properties.deviceID; - if (available_version < VK_API_VERSION_1_2) { - extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(), - REQUIRED_EXTENSIONS_BEFORE_1_2.end()); - } + const bool validated_driver = + driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || + driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY || + driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - if (available_version < VK_API_VERSION_1_3) { - extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(), - REQUIRED_EXTENSIONS_BEFORE_1_3.end()); - } + const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; - return extensions; + return validated_driver && !is_steam_deck; } -void Device::CheckSuitability(bool requires_swapchain) const { - std::vector<const char*> required_extensions = - ExtensionsRequiredForInstanceVersion(instance_version); - std::vector<const char*> available_extensions; +bool Device::GetSuitability(bool requires_swapchain) { + // Assume we will be suitable. + bool suitable = true; - if (requires_swapchain) { - required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - } + // Configure properties. + properties.properties = physical.GetProperties(); + + // Set instance version. + instance_version = properties.properties.apiVersion; + // Minimum of API version 1.1 is required. (This is well-supported.) + ASSERT(instance_version >= VK_API_VERSION_1_1); + + // Get available extensions. auto extension_properties = physical.EnumerateDeviceExtensionProperties(); + // Get the set of supported extensions. + supported_extensions.clear(); for (const VkExtensionProperties& property : extension_properties) { - available_extensions.push_back(property.extensionName); + supported_extensions.insert(property.extensionName); } - bool has_all_required_extensions = true; - for (const char* requirement_name : required_extensions) { - const bool found = - std::ranges::any_of(available_extensions, [&](const char* extension_name) { - return std::strcmp(requirement_name, extension_name) == 0; - }); + // Generate list of extensions to load. + loaded_extensions.clear(); - if (!found) { - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name); - has_all_required_extensions = false; - } +#define EXTENSION(prefix, macro_name, var_name) \ + if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \ + loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \ + extensions.var_name = true; \ } - - if (!has_all_required_extensions) { - throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); +#define FEATURE_EXTENSION(prefix, struct_name, macro_name, var_name) \ + if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \ + loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \ + extensions.var_name = true; \ } - struct LimitTuple { - u32 minimum; - u32 value; - const char* name; - }; - const VkPhysicalDeviceLimits& limits{properties.limits}; - const std::array limits_report{ - LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, - LimitTuple{16, limits.maxViewports, "maxViewports"}, - LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, - LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, - }; - for (const auto& tuple : limits_report) { - if (tuple.value < tuple.minimum) { - LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, - tuple.minimum, tuple.value); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } + if (instance_version < VK_API_VERSION_1_2) { + FOR_EACH_VK_FEATURE_1_2(FEATURE_EXTENSION); + } + if (instance_version < VK_API_VERSION_1_3) { + FOR_EACH_VK_FEATURE_1_3(FEATURE_EXTENSION); } - VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{}; - demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES; - demote.pNext = nullptr; - VkPhysicalDeviceVariablePointerFeatures variable_pointers{}; - variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES; - variable_pointers.pNext = &demote; + FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION); + FOR_EACH_VK_EXTENSION(EXTENSION); +#ifdef _WIN32 + FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); +#endif - VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; - robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &variable_pointers; +#undef FEATURE_EXTENSION +#undef EXTENSION - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{}; - timeline_semaphore.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; - timeline_semaphore.pNext = &robustness2; + // Some extensions are mandatory. Check those. +#define CHECK_EXTENSION(extension_name) \ + if (!loaded_extensions.contains(extension_name)) { \ + LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \ + suitable = false; \ + } - VkPhysicalDevice16BitStorageFeatures bit16_storage{}; - bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; - bit16_storage.pNext = &timeline_semaphore; +#define LOG_EXTENSION(extension_name) \ + if (!loaded_extensions.contains(extension_name)) { \ + LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \ + } - VkPhysicalDevice8BitStorageFeatures bit8_storage{}; - bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; - bit8_storage.pNext = &bit16_storage; + FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION); + FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION); +#ifdef _WIN32 + FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION); +#else + FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION); +#endif - VkPhysicalDeviceHostQueryResetFeatures host_query_reset{}; - host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; - host_query_reset.pNext = &bit8_storage; + if (requires_swapchain) { + CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + } - VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{}; - draw_parameters.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES; - draw_parameters.pNext = &host_query_reset; +#undef LOG_EXTENSION +#undef CHECK_EXTENSION - VkPhysicalDeviceFeatures2 features2{}; + // Generate the linked list of features to test. features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - features2.pNext = &draw_parameters; - physical.GetFeatures2(features2); + // Set next pointer. + void** next = &features2.pNext; - const VkPhysicalDeviceFeatures& features{features2.features}; - std::array feature_report{ - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), - std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.imageCubeArray, "imageCubeArray"), - std::make_pair(features.independentBlend, "independentBlend"), - std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"), - std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"), - std::make_pair(features.depthClamp, "depthClamp"), - std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), - std::make_pair(features.largePoints, "largePoints"), - std::make_pair(features.multiViewport, "multiViewport"), - std::make_pair(features.depthBiasClamp, "depthBiasClamp"), - std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), - std::make_pair(features.wideLines, "wideLines"), - std::make_pair(features.geometryShader, "geometryShader"), - std::make_pair(features.tessellationShader, "tessellationShader"), - std::make_pair(features.sampleRateShading, "sampleRateShading"), - std::make_pair(features.dualSrcBlend, "dualSrcBlend"), - std::make_pair(features.logicOp, "logicOp"), - std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), - std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), - std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), - std::make_pair(features.shaderStorageImageWriteWithoutFormat, - "shaderStorageImageWriteWithoutFormat"), - std::make_pair(features.shaderClipDistance, "shaderClipDistance"), - std::make_pair(features.shaderCullDistance, "shaderCullDistance"), - std::make_pair(variable_pointers.variablePointers, "variablePointers"), - std::make_pair(variable_pointers.variablePointersStorageBuffer, - "variablePointersStorageBuffer"), - std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), - std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), - std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), - std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), - std::make_pair(timeline_semaphore.timelineSemaphore, "timelineSemaphore"), - std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"), - std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess, - "uniformAndStorageBuffer16BitAccess"), - std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"), - std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess, - "uniformAndStorageBuffer8BitAccess"), - std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"), - std::make_pair(draw_parameters.shaderDrawParameters, "shaderDrawParameters"), - }; + // Test all features we know about. If the feature is not available in core at our + // current API version, and was not enabled by an extension, skip testing the feature. + // We set the structure sType explicitly here as it is zeroed by the constructor. +#define FEATURE(prefix, struct_name, macro_name, var_name) \ + features.var_name.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES; \ + SetNext(next, features.var_name); - bool has_all_required_features = true; - for (const auto& [is_supported, name] : feature_report) { - if (!is_supported) { - LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); - has_all_required_features = false; - } +#define EXT_FEATURE(prefix, struct_name, macro_name, var_name) \ + if (extensions.var_name) { \ + features.var_name.sType = \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES_##prefix; \ + SetNext(next, features.var_name); \ } - if (!has_all_required_features) { - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + FOR_EACH_VK_FEATURE_1_1(FEATURE); + FOR_EACH_VK_FEATURE_EXT(EXT_FEATURE); + if (instance_version >= VK_API_VERSION_1_2) { + FOR_EACH_VK_FEATURE_1_2(FEATURE); + } else { + FOR_EACH_VK_FEATURE_1_2(EXT_FEATURE); } -} - -std::vector<const char*> Device::LoadExtensions(bool requires_surface) { - std::vector<const char*> extensions = ExtensionsRequiredForInstanceVersion(instance_version); - if (requires_surface) { - extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if (instance_version >= VK_API_VERSION_1_3) { + FOR_EACH_VK_FEATURE_1_3(FEATURE); + } else { + FOR_EACH_VK_FEATURE_1_3(EXT_FEATURE); } - bool has_khr_shader_float16_int8{}; - bool has_khr_workgroup_memory_explicit_layout{}; - bool has_khr_pipeline_executable_properties{}; - bool has_khr_image_format_list{}; - bool has_khr_swapchain_mutable_format{}; - bool has_ext_subgroup_size_control{}; - bool has_ext_transform_feedback{}; - bool has_ext_custom_border_color{}; - bool has_ext_extended_dynamic_state{}; - bool has_ext_extended_dynamic_state_2{}; - bool has_ext_extended_dynamic_state_3{}; - bool has_ext_shader_atomic_int64{}; - bool has_ext_provoking_vertex{}; - bool has_ext_vertex_input_dynamic_state{}; - bool has_ext_line_rasterization{}; - bool has_ext_primitive_topology_list_restart{}; - bool has_ext_depth_clip_control{}; - for (const std::string& extension : supported_extensions) { - const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, - bool push) { - if (extension != name) { - return; - } - if (push) { - extensions.push_back(name); - } - if (status) { - status->get() = true; - } - }; - test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); - test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); - test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, - true); - test(khr_uniform_buffer_standard_layout, - VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); - test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); - test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); - test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true); - test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); - test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); - test(has_ext_primitive_topology_list_restart, - VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true); - test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); - test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, - true); - test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); - test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); - test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, - true); - test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false); - test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); - test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state_2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, - false); - test(has_ext_extended_dynamic_state_3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME, - false); - test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, true); - test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); - test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, - false); - test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); - test(has_khr_workgroup_memory_explicit_layout, - VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); - test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, - false); - test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); - test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true); - if (Settings::values.enable_nsight_aftermath) { - test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, - true); - } - if (Settings::values.renderer_shader_feedback) { - test(has_khr_pipeline_executable_properties, - VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false); - } - } - VkPhysicalDeviceFeatures2 features{}; - features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - - VkPhysicalDeviceProperties2 physical_properties{}; - physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - - if (has_khr_shader_float16_int8) { - VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features; - float16_int8_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; - float16_int8_features.pNext = nullptr; - features.pNext = &float16_int8_features; - - physical.GetFeatures2(features); - is_float16_supported = float16_int8_features.shaderFloat16; - is_int8_supported = float16_int8_features.shaderInt8; - extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); - } - if (has_ext_subgroup_size_control) { - VkPhysicalDeviceSubgroupSizeControlFeatures subgroup_features; - subgroup_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES; - subgroup_features.pNext = nullptr; - features.pNext = &subgroup_features; - physical.GetFeatures2(features); - - VkPhysicalDeviceSubgroupSizeControlProperties subgroup_properties; - subgroup_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; - subgroup_properties.pNext = nullptr; - physical_properties.pNext = &subgroup_properties; - physical.GetProperties2(physical_properties); +#undef EXT_FEATURE +#undef FEATURE - is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; + // Perform the feature test. + physical.GetFeatures2(features2); + features.features = features2.features; - if (subgroup_features.subgroupSizeControl && - subgroup_properties.minSubgroupSize <= GuestWarpSize && - subgroup_properties.maxSubgroupSize >= GuestWarpSize) { - extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; - ext_subgroup_size_control = true; - } - } else { - is_warp_potentially_bigger = true; + // Some features are mandatory. Check those. +#define CHECK_FEATURE(feature, name) \ + if (!features.feature.name) { \ + LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \ + suitable = false; \ } - if (has_ext_provoking_vertex) { - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; - provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; - provoking_vertex.pNext = nullptr; - features.pNext = &provoking_vertex; - physical.GetFeatures2(features); - - if (provoking_vertex.provokingVertexLast && - provoking_vertex.transformFeedbackPreservesProvokingVertex) { - extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - ext_provoking_vertex = true; - } - } - if (has_ext_vertex_input_dynamic_state) { - VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; - vertex_input.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; - vertex_input.pNext = nullptr; - features.pNext = &vertex_input; - physical.GetFeatures2(features); - - if (vertex_input.vertexInputDynamicState) { - extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); - ext_vertex_input_dynamic_state = true; - } - } - if (has_ext_shader_atomic_int64) { - VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; - atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES; - atomic_int64.pNext = nullptr; - features.pNext = &atomic_int64; - physical.GetFeatures2(features); - - if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { - extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); - ext_shader_atomic_int64 = true; - } - } - if (has_ext_transform_feedback) { - VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; - tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - tfb_features.pNext = nullptr; - features.pNext = &tfb_features; - physical.GetFeatures2(features); - - VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; - tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; - tfb_properties.pNext = nullptr; - physical_properties.pNext = &tfb_properties; - physical.GetProperties2(physical_properties); - if (tfb_features.transformFeedback && tfb_features.geometryStreams && - tfb_properties.maxTransformFeedbackStreams >= 4 && - tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && - tfb_properties.transformFeedbackDraw) { - extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); - ext_transform_feedback = true; - } - } - if (has_ext_custom_border_color) { - VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; - border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; - border_features.pNext = nullptr; - features.pNext = &border_features; - physical.GetFeatures2(features); - - if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) { - extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - ext_custom_border_color = true; - } - } - if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; - extended_dynamic_state.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - extended_dynamic_state.pNext = nullptr; - features.pNext = &extended_dynamic_state; - physical.GetFeatures2(features); - - if (extended_dynamic_state.extendedDynamicState) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - ext_extended_dynamic_state = true; - } - } - if (has_ext_extended_dynamic_state_2) { - VkPhysicalDeviceExtendedDynamicState2FeaturesEXT extended_dynamic_state_2; - extended_dynamic_state_2.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT; - extended_dynamic_state_2.pNext = nullptr; - features.pNext = &extended_dynamic_state_2; - physical.GetFeatures2(features); - - if (extended_dynamic_state_2.extendedDynamicState2) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - ext_extended_dynamic_state_2 = true; - ext_extended_dynamic_state_2_extra = - extended_dynamic_state_2.extendedDynamicState2LogicOp; - } +#define LOG_FEATURE(feature, name) \ + if (!features.feature.name) { \ + LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \ } - if (has_ext_extended_dynamic_state_3) { - VkPhysicalDeviceExtendedDynamicState3FeaturesEXT extended_dynamic_state_3; - extended_dynamic_state_3.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT; - extended_dynamic_state_3.pNext = nullptr; - features.pNext = &extended_dynamic_state_3; - physical.GetFeatures2(features); - - ext_extended_dynamic_state_3_blend = - extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && - extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && - extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; - - ext_extended_dynamic_state_3_enables = - extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && - extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; - - ext_extended_dynamic_state_3 = - ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; - if (ext_extended_dynamic_state_3) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - } + + FOR_EACH_VK_RECOMMENDED_FEATURE(LOG_FEATURE); + FOR_EACH_VK_MANDATORY_FEATURE(CHECK_FEATURE); + +#undef LOG_FEATURE +#undef CHECK_FEATURE + + // Generate linked list of properties. + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + // Set next pointer. + next = &properties2.pNext; + + // Get driver info. + properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + SetNext(next, properties.driver); + + // Retrieve relevant extension properties. + if (extensions.shader_float_controls) { + properties.float_controls.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; + SetNext(next, properties.float_controls); } - if (has_ext_line_rasterization) { - VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; - line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; - line_raster.pNext = nullptr; - features.pNext = &line_raster; - physical.GetFeatures2(features); - if (line_raster.rectangularLines && line_raster.smoothLines) { - extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); - ext_line_rasterization = true; - } + if (extensions.push_descriptor) { + properties.push_descriptor.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + SetNext(next, properties.push_descriptor); } - if (has_ext_depth_clip_control) { - VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; - depth_clip_control_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT; - depth_clip_control_features.pNext = nullptr; - features.pNext = &depth_clip_control_features; - physical.GetFeatures2(features); - - if (depth_clip_control_features.depthClipControl) { - extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); - ext_depth_clip_control = true; - } + if (extensions.subgroup_size_control) { + properties.subgroup_size_control.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; + SetNext(next, properties.subgroup_size_control); } - if (has_khr_workgroup_memory_explicit_layout) { - VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; - layout.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; - layout.pNext = nullptr; - features.pNext = &layout; - physical.GetFeatures2(features); - - if (layout.workgroupMemoryExplicitLayout && - layout.workgroupMemoryExplicitLayout8BitAccess && - layout.workgroupMemoryExplicitLayout16BitAccess && - layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { - extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); - khr_workgroup_memory_explicit_layout = true; - } + if (extensions.transform_feedback) { + properties.transform_feedback.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + SetNext(next, properties.transform_feedback); } - if (has_khr_pipeline_executable_properties) { - VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; - executable_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; - executable_properties.pNext = nullptr; - features.pNext = &executable_properties; - physical.GetFeatures2(features); - - if (executable_properties.pipelineExecutableInfo) { - extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); - khr_pipeline_executable_properties = true; + + // Perform the property fetch. + physical.GetProperties2(properties2); + properties.properties = properties2.properties; + + // Unload extensions if feature support is insufficient. + RemoveUnsuitableExtensions(); + + // Check limits. + struct Limit { + u32 minimum; + u32 value; + const char* name; + }; + + const VkPhysicalDeviceLimits& limits{properties.properties.limits}; + const std::array limits_report{ + Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, + Limit{16, limits.maxViewports, "maxViewports"}, + Limit{8, limits.maxColorAttachments, "maxColorAttachments"}, + Limit{8, limits.maxClipDistances, "maxClipDistances"}, + }; + + for (const auto& [min, value, name] : limits_report) { + if (value < min) { + LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", name, min, value); + suitable = false; } } - if (has_ext_primitive_topology_list_restart) { - VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{}; - primitive_topology_list_restart.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT; - primitive_topology_list_restart.pNext = nullptr; - features.pNext = &primitive_topology_list_restart; - physical.GetFeatures2(features); - - is_topology_list_restart_supported = - primitive_topology_list_restart.primitiveTopologyListRestart; - is_patch_list_restart_supported = - primitive_topology_list_restart.primitiveTopologyPatchListRestart; - } - if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { - extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); - extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); - khr_swapchain_mutable_format = true; - } - if (khr_push_descriptor) { - VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; - push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; - push_descriptor.pNext = nullptr; - physical_properties.pNext = &push_descriptor; - physical.GetProperties2(physical_properties); + // Return whether we were suitable. + return suitable; +} - max_push_descriptors = push_descriptor.maxPushDescriptors; +void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) { + if (loaded_extensions.contains(extension_name) && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name); + loaded_extensions.erase(extension_name); } +} - has_null_descriptor = true; - - return extensions; +void Device::RemoveUnsuitableExtensions() { + // VK_EXT_custom_border_color + extensions.custom_border_color = features.custom_border_color.customBorderColors && + features.custom_border_color.customBorderColorWithoutFormat; + RemoveExtensionIfUnsuitable(extensions.custom_border_color, + VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + + // VK_EXT_depth_clip_control + extensions.depth_clip_control = features.depth_clip_control.depthClipControl; + RemoveExtensionIfUnsuitable(extensions.depth_clip_control, + VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + + // VK_EXT_extended_dynamic_state + extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state, + VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + + // VK_EXT_extended_dynamic_state2 + extensions.extended_dynamic_state2 = features.extended_dynamic_state2.extendedDynamicState2; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state2, + VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); + + // VK_EXT_extended_dynamic_state3 + dynamic_state3_blending = + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable && + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation && + features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask; + dynamic_state3_enables = + features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable && + features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable; + + extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables; + dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3; + dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state3, + VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); + + // VK_EXT_provoking_vertex + extensions.provoking_vertex = + features.provoking_vertex.provokingVertexLast && + features.provoking_vertex.transformFeedbackPreservesProvokingVertex; + RemoveExtensionIfUnsuitable(extensions.provoking_vertex, + VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + + // VK_KHR_shader_atomic_int64 + extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && + features.shader_atomic_int64.shaderSharedInt64Atomics; + RemoveExtensionIfUnsuitable(extensions.shader_atomic_int64, + VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + + // VK_EXT_shader_demote_to_helper_invocation + extensions.shader_demote_to_helper_invocation = + features.shader_demote_to_helper_invocation.shaderDemoteToHelperInvocation; + RemoveExtensionIfUnsuitable(extensions.shader_demote_to_helper_invocation, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + + // VK_EXT_subgroup_size_control + extensions.subgroup_size_control = + features.subgroup_size_control.subgroupSizeControl && + properties.subgroup_size_control.minSubgroupSize <= GuestWarpSize && + properties.subgroup_size_control.maxSubgroupSize >= GuestWarpSize; + RemoveExtensionIfUnsuitable(extensions.subgroup_size_control, + VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + + // VK_EXT_transform_feedback + extensions.transform_feedback = + features.transform_feedback.transformFeedback && + features.transform_feedback.geometryStreams && + properties.transform_feedback.maxTransformFeedbackStreams >= 4 && + properties.transform_feedback.maxTransformFeedbackBuffers > 0 && + properties.transform_feedback.transformFeedbackQueries && + properties.transform_feedback.transformFeedbackDraw; + RemoveExtensionIfUnsuitable(extensions.transform_feedback, + VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + + // VK_EXT_vertex_input_dynamic_state + extensions.vertex_input_dynamic_state = + features.vertex_input_dynamic_state.vertexInputDynamicState; + RemoveExtensionIfUnsuitable(extensions.vertex_input_dynamic_state, + VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + + // VK_KHR_pipeline_executable_properties + if (Settings::values.renderer_shader_feedback.GetValue()) { + extensions.pipeline_executable_properties = + features.pipeline_executable_properties.pipelineExecutableInfo; + RemoveExtensionIfUnsuitable(extensions.pipeline_executable_properties, + VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); + } else { + extensions.pipeline_executable_properties = false; + loaded_extensions.erase(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); + } + + // VK_KHR_workgroup_memory_explicit_layout + extensions.workgroup_memory_explicit_layout = + features.features.shaderInt16 && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; + RemoveExtensionIfUnsuitable(extensions.workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); } void Device::SetupFamilies(VkSurfaceKHR surface) { @@ -1520,55 +937,12 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - graphics_family = *graphics; - present_family = *present; -} - -void Device::SetupFeatures() { - const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; - is_depth_bounds_supported = features.depthBounds; - is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; - is_shader_float64_supported = features.shaderFloat64; - is_shader_int64_supported = features.shaderInt64; - is_shader_int16_supported = features.shaderInt16; - is_shader_storage_image_multisample = features.shaderStorageImageMultisample; - is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(features); - - const VkPhysicalDeviceLimits& limits{properties.limits}; - max_vertex_input_attributes = limits.maxVertexInputAttributes; - max_vertex_input_bindings = limits.maxVertexInputBindings; -} - -void Device::SetupProperties() { - float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; - - VkPhysicalDeviceProperties2KHR properties2{}; - properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - properties2.pNext = &float_controls; - - physical.GetProperties2(properties2); -} - -void Device::CollectTelemetryParameters() { - VkPhysicalDeviceDriverProperties driver{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, - .pNext = nullptr, - .driverID = {}, - .driverName = {}, - .driverInfo = {}, - .conformanceVersion = {}, - }; - - VkPhysicalDeviceProperties2 device_properties{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - .pNext = &driver, - .properties = {}, - }; - physical.GetProperties2(device_properties); - - driver_id = driver.driverID; - vendor_name = driver.driverName; + if (graphics) { + graphics_family = *graphics; + } + if (present) { + present_family = *present; + } } u64 Device::GetDeviceMemoryUsage() const { @@ -1586,7 +960,8 @@ u64 Device::GetDeviceMemoryUsage() const { void Device::CollectPhysicalMemoryInfo() { VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; - const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); + const auto mem_info = + physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr); const auto& mem_properties = mem_info.memoryProperties; const size_t num_properties = mem_properties.memoryHeapCount; device_access_memory = 0; @@ -1602,7 +977,7 @@ void Device::CollectPhysicalMemoryInfo() { if (is_heap_local) { local_memory += mem_properties.memoryHeaps[element].size; } - if (ext_memory_budget) { + if (extensions.memory_budget) { device_initial_usage += budget.heapUsage[element]; device_access_memory += budget.heapBudget[element]; continue; @@ -1618,7 +993,7 @@ void Device::CollectPhysicalMemoryInfo() { } void Device::CollectToolingInfo() { - if (!ext_tooling_info) { + if (!extensions.tooling_info) { return; } auto tools{physical.GetPhysicalDeviceToolProperties()}; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 6042046e1..4cfb20bc2 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -3,6 +3,7 @@ #pragma once +#include <set> #include <span> #include <string> #include <unordered_map> @@ -11,6 +12,155 @@ #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +// Define all features which may be used by the implementation here. +// Vulkan version in the macro describes the minimum version required for feature availability. +// If the Vulkan version is lower than the required version, the named extension is required. +#define FOR_EACH_VK_FEATURE_1_1(FEATURE) \ + FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \ + FEATURE(KHR, 16BitStorage, 16BIT_STORAGE, bit16_storage) \ + FEATURE(KHR, ShaderAtomicInt64, SHADER_ATOMIC_INT64, shader_atomic_int64) \ + FEATURE(KHR, ShaderDrawParameters, SHADER_DRAW_PARAMETERS, shader_draw_parameters) \ + FEATURE(KHR, ShaderFloat16Int8, SHADER_FLOAT16_INT8, shader_float16_int8) \ + FEATURE(KHR, UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, \ + uniform_buffer_standard_layout) \ + FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer) + +#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \ + FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \ + FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \ + FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore) + +#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \ + FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \ + shader_demote_to_helper_invocation) + +// Define all features which may be used by the implementation and require an extension here. +#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \ + FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \ + FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \ + FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ + FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ + FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ + FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ + FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ + FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \ + primitive_topology_list_restart) \ + FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ + FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \ + FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \ + FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \ + FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \ + pipeline_executable_properties) \ + FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \ + workgroup_memory_explicit_layout) + +// Define miscellaneous extensions which may be used by the implementation here. +#define FOR_EACH_VK_EXTENSION(EXTENSION) \ + EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ + EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ + EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ + EXTENSION(EXT, ROBUSTNESS_2, robustness_2) \ + EXTENSION(EXT, SAMPLER_FILTER_MINMAX, sampler_filter_minmax) \ + EXTENSION(EXT, SHADER_STENCIL_EXPORT, shader_stencil_export) \ + EXTENSION(EXT, SHADER_VIEWPORT_INDEX_LAYER, shader_viewport_index_layer) \ + EXTENSION(EXT, TOOLING_INFO, tooling_info) \ + EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \ + EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \ + EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \ + EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ + EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ + EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ + EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ + EXTENSION(KHR, SWAPCHAIN, swapchain) \ + EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ + EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ + EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ + EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ + EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) + +#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \ + EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32) + +// Define extensions which must be supported. +#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME) + +#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME) + +#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME) + +// Define extensions where the absence of the extension may result in a degraded experience. +#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME) + +// Define features which must be supported. +#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \ + FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \ + FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \ + FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \ + FEATURE_NAME(features, depthBiasClamp) \ + FEATURE_NAME(features, depthClamp) \ + FEATURE_NAME(features, drawIndirectFirstInstance) \ + FEATURE_NAME(features, dualSrcBlend) \ + FEATURE_NAME(features, fillModeNonSolid) \ + FEATURE_NAME(features, fragmentStoresAndAtomics) \ + FEATURE_NAME(features, geometryShader) \ + FEATURE_NAME(features, imageCubeArray) \ + FEATURE_NAME(features, independentBlend) \ + FEATURE_NAME(features, largePoints) \ + FEATURE_NAME(features, logicOp) \ + FEATURE_NAME(features, multiDrawIndirect) \ + FEATURE_NAME(features, multiViewport) \ + FEATURE_NAME(features, occlusionQueryPrecise) \ + FEATURE_NAME(features, robustBufferAccess) \ + FEATURE_NAME(features, samplerAnisotropy) \ + FEATURE_NAME(features, sampleRateShading) \ + FEATURE_NAME(features, shaderClipDistance) \ + FEATURE_NAME(features, shaderCullDistance) \ + FEATURE_NAME(features, shaderImageGatherExtended) \ + FEATURE_NAME(features, shaderStorageImageWriteWithoutFormat) \ + FEATURE_NAME(features, tessellationShader) \ + FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \ + FEATURE_NAME(features, wideLines) \ + FEATURE_NAME(host_query_reset, hostQueryReset) \ + FEATURE_NAME(robustness2, nullDescriptor) \ + FEATURE_NAME(robustness2, robustBufferAccess2) \ + FEATURE_NAME(robustness2, robustImageAccess2) \ + FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \ + FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \ + FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ + FEATURE_NAME(variable_pointer, variablePointers) \ + FEATURE_NAME(variable_pointer, variablePointersStorageBuffer) + +// Define features where the absence of the feature may result in a degraded experience. +#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(custom_border_color, customBorderColors) \ + FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \ + FEATURE_NAME(index_type_uint8, indexTypeUint8) \ + FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \ + FEATURE_NAME(provoking_vertex, provokingVertexLast) \ + FEATURE_NAME(shader_float16_int8, shaderFloat16) \ + FEATURE_NAME(shader_float16_int8, shaderInt8) \ + FEATURE_NAME(transform_feedback, transformFeedback) \ + FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \ + FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState) + namespace Vulkan { class NsightAftermathTracker; @@ -88,67 +238,69 @@ public: /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. u32 ApiVersion() const { - return properties.apiVersion; + return properties.properties.apiVersion; } /// Returns the current driver version provided in Vulkan-formatted version numbers. u32 GetDriverVersion() const { - return properties.driverVersion; + return properties.properties.driverVersion; } /// Returns the device name. std::string_view GetModelName() const { - return properties.deviceName; + return properties.properties.deviceName; } /// Returns the driver ID. VkDriverIdKHR GetDriverID() const { - return driver_id; + return properties.driver.driverID; } + bool ShouldBoostClocks() const; + /// Returns uniform buffer alignment requeriment. VkDeviceSize GetUniformBufferAlignment() const { - return properties.limits.minUniformBufferOffsetAlignment; + return properties.properties.limits.minUniformBufferOffsetAlignment; } /// Returns storage alignment requeriment. VkDeviceSize GetStorageBufferAlignment() const { - return properties.limits.minStorageBufferOffsetAlignment; + return properties.properties.limits.minStorageBufferOffsetAlignment; } /// Returns the maximum range for storage buffers. VkDeviceSize GetMaxStorageBufferRange() const { - return properties.limits.maxStorageBufferRange; + return properties.properties.limits.maxStorageBufferRange; } /// Returns the maximum size for push constants. VkDeviceSize GetMaxPushConstantsSize() const { - return properties.limits.maxPushConstantsSize; + return properties.properties.limits.maxPushConstantsSize; } /// Returns the maximum size for shared memory. u32 GetMaxComputeSharedMemorySize() const { - return properties.limits.maxComputeSharedMemorySize; + return properties.properties.limits.maxComputeSharedMemorySize; } /// Returns float control properties of the device. const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { - return float_controls; + return properties.float_controls; } /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { - return is_optimal_astc_supported; + return features.features.textureCompressionASTC_LDR; } /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { - return is_float16_supported; + return features.shader_float16_int8.shaderFloat16; } /// Returns true if the device supports int8 natively. bool IsInt8Supported() const { - return is_int8_supported; + return features.shader_float16_int8.shaderInt8; } /// Returns true if the device warp size can potentially be bigger than guest's warp size. @@ -158,32 +310,32 @@ public: /// Returns true if the device can be forced to use the guest warp size. bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { - return guest_warp_stages & stage; + return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; } /// Returns the maximum number of push descriptors. u32 MaxPushDescriptors() const { - return max_push_descriptors; + return properties.push_descriptor.maxPushDescriptors; } /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { - return is_formatless_image_load_supported; + return features.features.shaderStorageImageReadWithoutFormat; } /// Returns true if shader int64 is supported. bool IsShaderInt64Supported() const { - return is_shader_int64_supported; + return features.features.shaderInt64; } /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { - return is_shader_int16_supported; + return features.features.shaderInt16; } // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { - return is_depth_bounds_supported; + return features.features.depthBounds; } /// Returns true when blitting from and to depth stencil images is supported. @@ -193,151 +345,151 @@ public: /// Returns true if the device supports VK_NV_viewport_swizzle. bool IsNvViewportSwizzleSupported() const { - return nv_viewport_swizzle; + return extensions.viewport_swizzle; } /// Returns true if the device supports VK_NV_viewport_array2. bool IsNvViewportArray2Supported() const { - return nv_viewport_array2; + return extensions.viewport_array2; } /// Returns true if the device supports VK_NV_geometry_shader_passthrough. bool IsNvGeometryShaderPassthroughSupported() const { - return nv_geometry_shader_passthrough; + return extensions.geometry_shader_passthrough; } /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { - return khr_uniform_buffer_standard_layout; + return extensions.uniform_buffer_standard_layout; } /// Returns true if the device supports VK_KHR_push_descriptor. bool IsKhrPushDescriptorSupported() const { - return khr_push_descriptor; + return extensions.push_descriptor; } /// Returns true if VK_KHR_pipeline_executable_properties is enabled. bool IsKhrPipelineExecutablePropertiesEnabled() const { - return khr_pipeline_executable_properties; + return extensions.pipeline_executable_properties; } /// Returns true if VK_KHR_swapchain_mutable_format is enabled. bool IsKhrSwapchainMutableFormatEnabled() const { - return khr_swapchain_mutable_format; + return extensions.swapchain_mutable_format; } /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { - return khr_workgroup_memory_explicit_layout; + return extensions.workgroup_memory_explicit_layout; } /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsTopologyListPrimitiveRestartSupported() const { - return is_topology_list_restart_supported; + return features.primitive_topology_list_restart.primitiveTopologyListRestart; } /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsPatchListPrimitiveRestartSupported() const { - return is_patch_list_restart_supported; + return features.primitive_topology_list_restart.primitiveTopologyPatchListRestart; } /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { - return ext_index_type_uint8; + return extensions.index_type_uint8; } /// Returns true if the device supports VK_EXT_sampler_filter_minmax. bool IsExtSamplerFilterMinmaxSupported() const { - return ext_sampler_filter_minmax; + return extensions.sampler_filter_minmax; } /// Returns true if the device supports VK_EXT_depth_range_unrestricted. bool IsExtDepthRangeUnrestrictedSupported() const { - return ext_depth_range_unrestricted; + return extensions.depth_range_unrestricted; } /// Returns true if the device supports VK_EXT_depth_clip_control. bool IsExtDepthClipControlSupported() const { - return ext_depth_clip_control; + return extensions.depth_clip_control; } /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. bool IsExtShaderViewportIndexLayerSupported() const { - return ext_shader_viewport_index_layer; + return extensions.shader_viewport_index_layer; } /// Returns true if the device supports VK_EXT_subgroup_size_control. bool IsExtSubgroupSizeControlSupported() const { - return ext_subgroup_size_control; + return extensions.subgroup_size_control; } /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { - return ext_transform_feedback; + return extensions.transform_feedback; } /// Returns true if the device supports VK_EXT_custom_border_color. bool IsExtCustomBorderColorSupported() const { - return ext_custom_border_color; + return extensions.custom_border_color; } /// Returns true if the device supports VK_EXT_extended_dynamic_state. bool IsExtExtendedDynamicStateSupported() const { - return ext_extended_dynamic_state; + return extensions.extended_dynamic_state; } /// Returns true if the device supports VK_EXT_extended_dynamic_state2. bool IsExtExtendedDynamicState2Supported() const { - return ext_extended_dynamic_state_2; + return extensions.extended_dynamic_state2; } bool IsExtExtendedDynamicState2ExtrasSupported() const { - return ext_extended_dynamic_state_2_extra; + return features.extended_dynamic_state2.extendedDynamicState2LogicOp; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3Supported() const { - return ext_extended_dynamic_state_3; + return extensions.extended_dynamic_state3; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3BlendingSupported() const { - return ext_extended_dynamic_state_3_blend; + return dynamic_state3_blending; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3EnablesSupported() const { - return ext_extended_dynamic_state_3_enables; + return dynamic_state3_enables; } /// Returns true if the device supports VK_EXT_line_rasterization. bool IsExtLineRasterizationSupported() const { - return ext_line_rasterization; + return extensions.line_rasterization; } /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { - return ext_vertex_input_dynamic_state; + return extensions.vertex_input_dynamic_state; } /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { - return ext_shader_stencil_export; + return extensions.shader_stencil_export; } /// Returns true if the device supports VK_EXT_conservative_rasterization. bool IsExtConservativeRasterizationSupported() const { - return ext_conservative_rasterization; + return extensions.conservative_rasterization; } /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { - return ext_provoking_vertex; + return extensions.provoking_vertex; } /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { - return ext_shader_atomic_int64; + return extensions.shader_atomic_int64; } /// Returns the minimum supported version of SPIR-V. @@ -345,7 +497,7 @@ public: if (instance_version >= VK_API_VERSION_1_3) { return 0x00010600U; } - if (khr_spirv_1_4) { + if (extensions.spirv_1_4) { return 0x00010400U; } return 0x00010000U; @@ -363,11 +515,11 @@ public: /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { - return vendor_name; + return properties.driver.driverName; } /// Returns the list of available extensions. - const std::vector<std::string>& GetAvailableExtensions() const { + const std::set<std::string, std::less<>>& GetAvailableExtensions() const { return supported_extensions; } @@ -376,7 +528,7 @@ public: } bool CanReportMemoryUsage() const { - return ext_memory_budget; + return extensions.memory_budget; } u64 GetDeviceMemoryUsage() const; @@ -398,36 +550,29 @@ public: } bool HasNullDescriptor() const { - return has_null_descriptor; + return features.robustness2.nullDescriptor; } u32 GetMaxVertexInputAttributes() const { - return max_vertex_input_attributes; + return properties.properties.limits.maxVertexInputAttributes; } u32 GetMaxVertexInputBindings() const { - return max_vertex_input_bindings; + return properties.properties.limits.maxVertexInputBindings; } private: - /// Checks if the physical device is suitable. - void CheckSuitability(bool requires_swapchain) const; + /// Checks if the physical device is suitable and configures the object state + /// with all necessary info about its properties. + bool GetSuitability(bool requires_swapchain); - /// Loads extensions into a vector and stores available ones in this object. - std::vector<const char*> LoadExtensions(bool requires_surface); + // Remove extensions which have incomplete feature support. + void RemoveUnsuitableExtensions(); + void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name); /// Sets up queue families. void SetupFamilies(VkSurfaceKHR surface); - /// Sets up device features. - void SetupFeatures(); - - /// Sets up device properties. - void SetupProperties(); - - /// Collects telemetry information from the device. - void CollectTelemetryParameters(); - /// Collects information about attached tools. void CollectToolingInfo(); @@ -438,91 +583,93 @@ private: std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + bool ComputeIsOptimalAstcSupported() const; /// Returns true if the device natively supports blitting depth stencil images. bool TestDepthStencilBlits() const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - VkDriverIdKHR driver_id{}; ///< Driver ID. - VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. - u64 device_access_memory{}; ///< Total size of device local memory in bytes. - u32 max_push_descriptors{}; ///< Maximum number of push descriptors - u32 sets_per_pool{}; ///< Sets per Description Pool - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetic. - bool is_int8_supported{}; ///< Support for int8 arithmetic. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. - bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. - bool is_depth_bounds_supported{}; ///< Support for depth bounds. - bool is_shader_float64_supported{}; ///< Support for float64. - bool is_shader_int64_supported{}; ///< Support for int64. - bool is_shader_int16_supported{}; ///< Support for int16. - bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. - bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. - bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list - ///< topologies. - bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. - bool is_integrated{}; ///< Is GPU an iGPU. - bool is_virtual{}; ///< Is GPU a virtual GPU. - bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. - bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. - bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. - bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. - bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. - bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. - bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. - bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. - bool khr_pipeline_executable_properties{}; ///< Support for executable properties. - bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. - bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. - bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. - bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. - bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - bool supports_d24_depth{}; ///< Supports D24 depth buffers. - bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. - bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. - bool has_null_descriptor{}; ///< Has support for null descriptors. - u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline - u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline +private: + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan instance version. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + + struct Extensions { +#define EXTENSION(prefix, macro_name, var_name) bool var_name{}; +#define FEATURE(prefix, struct_name, macro_name, var_name) bool var_name{}; + + FOR_EACH_VK_FEATURE_1_1(FEATURE); + FOR_EACH_VK_FEATURE_1_2(FEATURE); + FOR_EACH_VK_FEATURE_1_3(FEATURE); + FOR_EACH_VK_FEATURE_EXT(FEATURE); + FOR_EACH_VK_EXTENSION(EXTENSION); + FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); + +#undef EXTENSION +#undef FEATURE + }; + + struct Features { +#define FEATURE_CORE(prefix, struct_name, macro_name, var_name) \ + VkPhysicalDevice##struct_name##Features var_name{}; +#define FEATURE_EXT(prefix, struct_name, macro_name, var_name) \ + VkPhysicalDevice##struct_name##Features##prefix var_name{}; + + FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE); + FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE); + FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE); + FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT); + +#undef FEATURE_CORE +#undef FEATURE_EXT + + VkPhysicalDeviceFeatures features{}; + }; + + struct Properties { + VkPhysicalDeviceDriverProperties driver{}; + VkPhysicalDeviceFloatControlsProperties float_controls{}; + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; + VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{}; + + VkPhysicalDeviceProperties properties{}; + }; + + Extensions extensions{}; + Features features{}; + Properties properties{}; + + VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceProperties2 properties2{}; + + // Misc features + bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_integrated{}; ///< Is GPU an iGPU. + bool is_virtual{}; ///< Is GPU a virtual GPU. + bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. + bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. + bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 sets_per_pool{}; ///< Sets per Description Pool // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. - std::vector<size_t> valid_heap_memory; ///< Heaps used. + std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. + std::set<std::string, std::less<>> loaded_extensions; ///< Loaded Vulkan extensions. + std::vector<size_t> valid_heap_memory; ///< Heaps used. /// Format properties dictionary. std::unordered_map<VkFormat, VkFormatProperties> format_properties; diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 0eb377926..f05f3186c 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -34,6 +34,8 @@ SPDX-License-Identifier: GPL-2.0-or-later <string></string> <key>CSResourcesFileMapped</key> <true/> + <key>LSApplicationCategoryType</key> + <string>public.app-category.games</string> <key>LSRequiresCarbon</key> <true/> <key>NSHumanReadableCopyright</key> diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index fbfa3ba35..0db62baa3 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -690,6 +690,7 @@ void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); ReadGlobalSetting(Settings::values.renderer_backend); + ReadGlobalSetting(Settings::values.renderer_force_max_clock); ReadGlobalSetting(Settings::values.vulkan_device); ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); @@ -1306,6 +1307,9 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), static_cast<u32>(Settings::values.renderer_backend.GetDefault()), Settings::values.renderer_backend.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()), + static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)), + static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault())); WriteGlobalSetting(Settings::values.vulkan_device); WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a3fbe2ad0..fdf8485ce 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -25,6 +25,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); + ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -39,6 +40,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { Settings::values.max_anisotropy.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); + ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock, + &Settings::values.renderer_force_max_clock); ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, &Settings::values.max_anisotropy); ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, @@ -50,6 +53,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, + ui->renderer_force_max_clock, + renderer_force_max_clock); ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); @@ -81,6 +87,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { // Disable if not global (only happens during game) if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); + ui->renderer_force_max_clock->setEnabled( + Settings::values.renderer_force_max_clock.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); @@ -95,6 +103,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { return; } + ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, + Settings::values.renderer_force_max_clock, + renderer_force_max_clock); ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 891efc068..df557d585 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -36,6 +36,7 @@ private: std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; + ConfigurationShared::CheckState renderer_force_max_clock; ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ccbdcf08f..061885e30 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -70,6 +70,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="renderer_force_max_clock"> + <property name="toolTip"> + <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> + </property> + <property name="text"> + <string>Force maximum clocks (Vulkan only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="use_vsync"> <property name="toolTip"> <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4f1d5e79e..571eacf9f 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1839,9 +1839,11 @@ void GMainWindow::OnEmulationStopTimeExpired() { void GMainWindow::OnEmulationStopped() { shutdown_timer.stop(); - emu_thread->disconnect(); - emu_thread->wait(); - emu_thread = nullptr; + if (emu_thread) { + emu_thread->disconnect(); + emu_thread->wait(); + emu_thread.reset(); + } if (shutdown_dialog) { shutdown_dialog->deleteLater(); @@ -3029,6 +3031,8 @@ void GMainWindow::OnStopGame() { if (OnShutdownBegin()) { OnShutdownBeginDialog(); + } else { + OnEmulationStopped(); } } diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index b2d690bb6..527017282 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -296,6 +296,7 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); + ReadSetting("Renderer", Settings::values.renderer_force_max_clock); ReadSetting("Renderer", Settings::values.renderer_debug); ReadSetting("Renderer", Settings::values.renderer_shader_feedback); ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); |