diff options
Diffstat (limited to 'src/video_core')
47 files changed, 336 insertions, 218 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0d6bf1f0d..6ecc8dbff 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -84,6 +84,7 @@ add_library(video_core STATIC gpu_thread.h memory_manager.cpp memory_manager.h + precompiled_headers.h pte_kind.h query_cache.h rasterizer_accelerated.cpp @@ -304,3 +305,7 @@ endif() if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) target_link_libraries(video_core PRIVATE dynarmic) endif() + +if (YUZU_USE_PRECOMPILED_HEADERS) + target_precompile_headers(video_core PRIVATE precompiled_headers.h) +endif() diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index f9a6472cf..92d77eef2 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -535,7 +535,7 @@ private: const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; - const u64 word_end = std::min(word_begin + num_query_words, NumWords()); + const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5d3a8293b..6881b34c4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -19,6 +19,7 @@ #include "common/literals.h" #include "common/lru_cache.h" #include "common/microprofile.h" +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "core/memory.h" #include "video_core/buffer_cache/buffer_base.h" diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 584a0c26c..cdaf4f8d5 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -35,8 +35,6 @@ public: explicit ChannelInfo(Tegra::Control::ChannelState& state); ChannelInfo(const ChannelInfo& state) = delete; ChannelInfo& operator=(const ChannelInfo&) = delete; - ChannelInfo(ChannelInfo&& other) = default; - ChannelInfo& operator=(ChannelInfo&& other) = default; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 28aa85f32..e4f8331ab 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -49,10 +49,9 @@ void State::ProcessData(std::span<const u8> read_buffer) { if (regs.line_count == 1) { rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); } else { - for (u32 line = 0; line < regs.line_count; ++line) { - const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; - std::span<const u8> buffer(read_buffer.data() + - static_cast<size_t>(line) * regs.line_length_in, + for (size_t line = 0; line < regs.line_count; ++line) { + const GPUVAddr dest_line = address + line * regs.dest.pitch; + std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, regs.line_length_in); rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); } diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index f08f6e36a..94fafd9dc 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -39,7 +39,7 @@ struct Registers { u32 y; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } u32 BlockWidth() const { diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 24b518cb5..100b21bac 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -97,7 +97,7 @@ public: u32 addr_lower; [[nodiscard]] constexpr GPUVAddr Address() const noexcept { - return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower); + return (GPUVAddr{addr_upper} << 32) | GPUVAddr{addr_lower}; } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7c50bdbe0..e5c622155 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -50,11 +50,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun u32 methods_pending) { switch (method) { case KEPLER_COMPUTE_REG_INDEX(data_upload): - upload_state.ProcessData(base_start, static_cast<size_t>(amount)); + upload_state.ProcessData(base_start, amount); return; default: - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); + for (u32 i = 0; i < amount; i++) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } break; } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index aab309ecc..e154e3f06 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -68,7 +68,7 @@ public: struct { u32 address; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); + return GPUVAddr{address} << 8; } } launch_desc_loc; @@ -83,8 +83,7 @@ public: u32 address_low; u32 limit; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } } tsc; @@ -95,8 +94,7 @@ public: u32 address_low; u32 limit; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } } tic; @@ -106,8 +104,7 @@ public: u32 address_high; u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } } code_loc; @@ -162,8 +159,7 @@ public: BitField<15, 17, u32> size; }; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | - address_low); + return (GPUVAddr{address_high.Value()} << 32) | GPUVAddr{address_low}; } }; std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index a3fbab1e5..08045d1cf 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -42,11 +42,11 @@ void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount u32 methods_pending) { switch (method) { case KEPLERMEMORY_REG_INDEX(data): - upload_state.ProcessData(base_start, static_cast<size_t>(amount)); + upload_state.ProcessData(base_start, amount); return; default: - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); + for (u32 i = 0; i < amount; i++) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } break; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 212427b8b..34bbc72cf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -126,7 +126,6 @@ void Maxwell3D::InitializeRegisterDefaults() { draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true; draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true; draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true; - draw_command[MAXWELL3D_REG_INDEX(draw.instance_id)] = true; } void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -218,16 +217,19 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume regs.index_buffer.count = regs.index_buffer32_first.count; regs.index_buffer.first = regs.index_buffer32_first.first; dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + draw_indexed = true; return ProcessDraw(); case MAXWELL3D_REG_INDEX(index_buffer16_first): regs.index_buffer.count = regs.index_buffer16_first.count; regs.index_buffer.first = regs.index_buffer16_first.first; dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + draw_indexed = true; return ProcessDraw(); case MAXWELL3D_REG_INDEX(index_buffer8_first): regs.index_buffer.count = regs.index_buffer8_first.count; regs.index_buffer.first = regs.index_buffer8_first.first; dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + draw_indexed = true; return ProcessDraw(); case MAXWELL3D_REG_INDEX(topology_override): use_topology_override = true; @@ -300,21 +302,33 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { draw_mode = DrawMode::InlineIndex; }; switch (method) { + case MAXWELL3D_REG_INDEX(draw.begin): { + draw_mode = + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) + ? DrawMode::Instance + : DrawMode::General; + break; + } case MAXWELL3D_REG_INDEX(draw.end): switch (draw_mode) { case DrawMode::General: - ProcessDraw(1); + ProcessDraw(); break; case DrawMode::InlineIndex: regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4); regs.index_buffer.format = Regs::IndexFormat::UnsignedInt; - ProcessDraw(1); + draw_indexed = true; + ProcessDraw(); inline_index_draw_indexes.clear(); break; case DrawMode::Instance: break; } break; + case MAXWELL3D_REG_INDEX(index_buffer.count): + draw_indexed = true; + break; case MAXWELL3D_REG_INDEX(draw_inline_index): update_inline_index(method_argument); break; @@ -328,13 +342,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { update_inline_index(regs.inline_index_4x8.index2); update_inline_index(regs.inline_index_4x8.index3); break; - case MAXWELL3D_REG_INDEX(draw.instance_id): - draw_mode = - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) - ? DrawMode::Instance - : DrawMode::General; - break; } } else { ProcessDeferredDraw(); @@ -370,11 +377,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, ProcessCBMultiData(base_start, amount); break; case MAXWELL3D_REG_INDEX(inline_data): - upload_state.ProcessData(base_start, static_cast<size_t>(amount)); + upload_state.ProcessData(base_start, amount); return; default: - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); + for (u32 i = 0; i < amount; i++) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } break; } @@ -624,27 +631,16 @@ void Maxwell3D::ProcessClearBuffers(u32 layer_count) { void Maxwell3D::ProcessDraw(u32 instance_count) { LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); + draw_indexed ? regs.index_buffer.count : regs.vertex_buffer.count); ProcessTopologyOverride(); - const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count; if (ShouldExecute()) { - rasterizer->Draw(is_indexed, instance_count); + rasterizer->Draw(draw_indexed, instance_count); } - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } + draw_indexed = false; + deferred_draw_method.clear(); } void Maxwell3D::ProcessDeferredDraw() { @@ -652,12 +648,12 @@ void Maxwell3D::ProcessDeferredDraw() { return; } - u32 method_count = static_cast<u32>(deferred_draw_method.size()); + const auto method_count = deferred_draw_method.size(); u32 instance_count = 1; u32 vertex_buffer_count = 0; u32 index_buffer_count = 0; - for (u32 index = 0; index < method_count; ++index) { - u32 method = deferred_draw_method[index]; + for (size_t index = 0; index < method_count; ++index) { + const u32 method = deferred_draw_method[index]; if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) { instance_count = ++vertex_buffer_count; } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) { @@ -667,8 +663,6 @@ void Maxwell3D::ProcessDeferredDraw() { ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), "Instance both indexed and direct?"); ProcessDraw(instance_count); - - deferred_draw_method.clear(); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 84c497ebd..a541cd95f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -96,8 +96,7 @@ public: u32 type; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -106,8 +105,7 @@ public: u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -124,8 +122,7 @@ public: Mode mode; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(offset_high) << 32) | - offset_low); + return (GPUVAddr{offset_high} << 32) | GPUVAddr{offset_low}; } }; @@ -187,7 +184,7 @@ public: default: // Thresholds begin at 0x10 (1 << 4) // Threshold is in the range 0x1 to 0x13 - return 1 << (4 + threshold.Value() - 1); + return 1U << (4 + threshold.Value() - 1); } } }; @@ -468,8 +465,7 @@ public: INSERT_PADDING_BYTES_NOINIT(0xC); GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; static_assert(sizeof(Buffer) == 0x20); @@ -511,12 +507,11 @@ public: u32 default_size_per_warp; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } u64 Size() const { - return (static_cast<u64>(size_high) << 32) | size_low; + return (u64{size_high} << 32) | u64{size_low}; } }; @@ -538,13 +533,11 @@ public: u32 storage_limit_address_low; GPUVAddr StorageAddress() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(storage_address_high) << 32) | - storage_address_low); + return (GPUVAddr{storage_address_high} << 32) | GPUVAddr{storage_address_low}; } GPUVAddr StorageLimitAddress() const { - return static_cast<GPUVAddr>( - (static_cast<GPUVAddr>(storage_limit_address_high) << 32) | - storage_limit_address_low); + return (GPUVAddr{storage_limit_address_high} << 32) | + GPUVAddr{storage_limit_address_low}; } }; @@ -829,11 +822,11 @@ public: struct CompressionThresholdSamples { u32 samples; - u32 Samples() { + u32 Samples() const { if (samples == 0) { return 0; } - return 1 << (samples - 1); + return 1U << (samples - 1); } }; @@ -1138,8 +1131,7 @@ public: INSERT_PADDING_BYTES_NOINIT(0x18); GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; static_assert(sizeof(RenderTargetConfig) == 0x40); @@ -1482,8 +1474,7 @@ public: u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -1533,8 +1524,7 @@ public: u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -1561,8 +1551,7 @@ public: u32 array_pitch; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -1910,8 +1899,7 @@ public: Mode mode; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -1921,8 +1909,7 @@ public: u32 limit; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -1932,8 +1919,7 @@ public: u32 limit; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -1981,8 +1967,7 @@ public: u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -2027,8 +2012,7 @@ public: u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -2224,19 +2208,16 @@ public: } GPUVAddr StartAddress() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_addr_high) << 32) | - start_addr_low); + return (GPUVAddr{start_addr_high} << 32) | GPUVAddr{start_addr_low}; } GPUVAddr EndAddress() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_addr_high) << 32) | - limit_addr_low); + return (GPUVAddr{limit_addr_high} << 32) | GPUVAddr{limit_addr_low}; } /// Adjust the index buffer offset so it points to the first desired index. GPUVAddr IndexStart() const { - return StartAddress() + - static_cast<size_t>(first) * static_cast<size_t>(FormatSizeInBytes()); + return StartAddress() + size_t{first} * size_t{FormatSizeInBytes()}; } }; @@ -2464,8 +2445,7 @@ public: } query; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -2479,8 +2459,7 @@ public: u32 frequency; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } bool IsEnabled() const { @@ -2494,8 +2473,7 @@ public: u32 address_low; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; static_assert(sizeof(VertexStreamLimit) == 0x8); @@ -2543,8 +2521,7 @@ public: std::array<u32, NumCBData> buffer; GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); + return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low}; } }; @@ -3182,6 +3159,7 @@ private: std::vector<u32> deferred_draw_method; enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; DrawMode draw_mode{DrawMode::General}; + bool draw_indexed{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 334429514..a189e60ae 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -41,8 +41,8 @@ void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (size_t i = 0; i < amount; ++i) { - CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); + for (u32 i = 0; i < amount; ++i) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } } @@ -94,14 +94,14 @@ void MaxwellDMA::Launch() { reinterpret_cast<u8*>(tmp_buffer.data()), regs.line_length_in * sizeof(u32)); } else { - auto convert_linear_2_blocklinear_addr = [](u64 address) { + const auto convert_linear_2_blocklinear_addr = [](u64 address) { return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | ((address & 0x180) >> 1) | ((address & 0x20) << 3); }; - auto src_kind = memory_manager.GetPageKind(regs.offset_in); - auto dst_kind = memory_manager.GetPageKind(regs.offset_out); - const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind)); - const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind)); + const auto src_kind = memory_manager.GetPageKind(regs.offset_in); + const auto dst_kind = memory_manager.GetPageKind(regs.offset_out); + const bool is_src_pitch = IsPitchKind(src_kind); + const bool is_dst_pitch = IsPitchKind(dst_kind); if (!is_src_pitch && is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index c308ba3fc..7718a09b3 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -31,7 +31,7 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, method_call.argument); const auto engine_id = static_cast<EngineID>(method_call.argument); - bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); + bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); @@ -285,12 +285,12 @@ void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, if (ExecuteMethodOnEngine(method)) { CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); } else { - for (std::size_t i = 0; i < amount; i++) { + for (u32 i = 0; i < amount; i++) { CallPullerMethod(MethodCall{ method, base_start[i], subchannel, - methods_pending - static_cast<u32>(i), + methods_pending - i, }); } } diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1bd477011..164a5252a 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -125,7 +125,7 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); if (block) { - state.cv.wait(lk, thread.get_stop_token(), [this, fence] { + Common::CondvarWait(state.cv, lk, thread.get_stop_token(), [this, fence] { return fence <= state.signaled_fence.load(std::memory_order_relaxed); }); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 64628d3e3..c71a419c7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,6 +10,7 @@ #include <thread> #include <variant> +#include "common/polyfill_thread.h" #include "common/threadsafe_queue.h" #include "video_core/framebuffer_config.h" diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp index a44fc83d3..8f23ce527 100644 --- a/src/video_core/host1x/syncpoint_manager.cpp +++ b/src/video_core/host1x/syncpoint_manager.cpp @@ -34,7 +34,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction( } void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage, - ActionHandle& handle) { + const ActionHandle& handle) { std::unique_lock lk(guard); // We want to ensure the iterator still exists prior to erasing it @@ -49,11 +49,11 @@ void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_stor } } -void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) { +void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, const ActionHandle& handle) { DeregisterAction(guest_action_storage[syncpoint_id], handle); } -void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) { +void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, const ActionHandle& handle) { DeregisterAction(host_action_storage[syncpoint_id], handle); } diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h index 50a264e23..847ed20c8 100644 --- a/src/video_core/host1x/syncpoint_manager.h +++ b/src/video_core/host1x/syncpoint_manager.h @@ -36,21 +36,19 @@ public: template <typename Func> ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) { - std::function<void()> func(action); return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id], - expected_value, std::move(func)); + expected_value, std::move(action)); } template <typename Func> ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) { - std::function<void()> func(action); return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id], - expected_value, std::move(func)); + expected_value, std::move(action)); } - void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle); + void DeregisterGuestAction(u32 syncpoint_id, const ActionHandle& handle); - void DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle); + void DeregisterHostAction(u32 syncpoint_id, const ActionHandle& handle); void IncrementGuest(u32 syncpoint_id); @@ -76,7 +74,7 @@ private: std::list<RegisteredAction>& action_storage, u32 expected_value, std::function<void()>&& action); - void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle); + void DeregisterAction(std::list<RegisteredAction>& action_storage, const ActionHandle& handle); void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value); diff --git a/src/video_core/precompiled_headers.h b/src/video_core/precompiled_headers.h new file mode 100644 index 000000000..aabae730b --- /dev/null +++ b/src/video_core/precompiled_headers.h @@ -0,0 +1,6 @@ +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_precompiled_headers.h" diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index cfd872a40..b6907463c 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,8 +6,8 @@ #include <functional> #include <optional> #include <span> -#include <stop_token> #include "common/common_types.h" +#include "common/polyfill_thread.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1663e277d..e2e3dac34 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -14,6 +14,7 @@ #include "common/literals.h" #include "common/logging/log.h" +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3fe04a115..a38060100 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -39,6 +39,7 @@ using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::GLSL::EmitGLSL; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::ConvertLegacyToGeneric; +using Shader::Maxwell::GenerateGeometryPassthrough; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -56,6 +57,17 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } +Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return Shader::OutputTopology::PointList; + case Maxwell::PrimitiveTopology::LineStrip: + return Shader::OutputTopology::LineStrip; + default: + return Shader::OutputTopology::TriangleStrip; + } +} + Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program, @@ -220,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_int64 = device.HasShaderInt64(), .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, + .support_viewport_index_layer = device.HasVertexViewportLayer(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -314,9 +327,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { const auto& regs{maxwell3d->regs}; graphics_key.raw = 0; graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0); - graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 - ? regs.draw.topology.Value() - : Maxwell::PrimitiveTopology{}); + graphics_key.gs_input_topology.Assign(regs.draw.topology.Value()); graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value()); graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value()); graphics_key.tessellation_clockwise.Assign( @@ -415,7 +426,19 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; const bool uses_vertex_a{key.unique_hashes[0] != 0}; const bool uses_vertex_b{key.unique_hashes[1] != 0}; + + // Layer passthrough generation for devices without GL_ARB_shader_viewport_layer_array + Shader::IR::Program* layer_source_program{}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + const bool is_emulated_stage = layer_source_program != nullptr && + index == static_cast<u32>(Maxwell::ShaderType::Geometry); + if (key.unique_hashes[index] == 0 && is_emulated_stage) { + auto topology = MaxwellToOutputTopology(key.gs_input_topology); + programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, + *layer_source_program, topology); + continue; + } if (key.unique_hashes[index] == 0) { continue; } @@ -443,6 +466,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors); programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } + + if (programs[index].info.requires_layer_emulation) { + layer_source_program = &programs[index]; + } } const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; @@ -456,7 +483,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == 0) { + const bool is_emulated_stage = layer_source_program != nullptr && + index == static_cast<u32>(Maxwell::ShaderType::Geometry); + if (key.unique_hashes[index] == 0 && !is_emulated_stage) { continue; } UNIMPLEMENTED_IF(index == 0); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 89f181fe3..53ffea904 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -4,7 +4,6 @@ #pragma once #include <filesystem> -#include <stop_token> #include <unordered_map> #include "common/common_types.h" diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 98cc26679..f3f08b42c 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -7,6 +7,7 @@ #include "common/bit_cast.h" #include "common/cityhash.h" #include "common/common_types.h" +#include "common/polyfill_ranges.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 89426121f..6e5abade4 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -10,6 +10,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "core/core.h" #include "core/frontend/emu_window.h" diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index c7196b64e..b5ae6443c 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -7,6 +7,7 @@ #include <vector> #include "common/common_types.h" +#include "common/polyfill_ranges.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 362ed579a..689f02ea5 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -7,6 +7,7 @@ #include <thread> #include "common/common_types.h" +#include "common/polyfill_thread.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d4b0a542a..29da442fa 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -46,6 +46,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::ConvertLegacyToGeneric; +using Shader::Maxwell::GenerateGeometryPassthrough; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -53,13 +54,24 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 7; +constexpr u32 CACHE_VERSION = 8; template <typename Container> auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } +Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return Shader::OutputTopology::PointList; + case Maxwell::PrimitiveTopology::LineStrip: + return Shader::OutputTopology::LineStrip; + default: + return Shader::OutputTopology::TriangleStrip; + } +} + Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never_D3D: @@ -277,7 +289,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ - .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, + .supported_spirv = device.SupportedSpirvVersion(), .unified_descriptor_binding = true, .support_descriptor_aliasing = true, .support_int8 = device.IsInt8Supported(), @@ -327,6 +339,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, .support_snorm_render_buffer = true, + .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), }; } @@ -509,7 +522,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; const bool uses_vertex_a{key.unique_hashes[0] != 0}; const bool uses_vertex_b{key.unique_hashes[1] != 0}; + + // Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer + Shader::IR::Program* layer_source_program{}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + const bool is_emulated_stage = layer_source_program != nullptr && + index == static_cast<u32>(Maxwell::ShaderType::Geometry); + if (key.unique_hashes[index] == 0 && is_emulated_stage) { + auto topology = MaxwellToOutputTopology(key.state.topology); + programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, + *layer_source_program, topology); + continue; + } if (key.unique_hashes[index] == 0) { continue; } @@ -530,6 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } + + if (programs[index].info.requires_layer_emulation) { + layer_source_program = &programs[index]; + } } std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; @@ -538,7 +567,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == 0) { + const bool is_emulated_stage = layer_source_program != nullptr && + index == static_cast<u32>(Maxwell::ShaderType::Geometry); + if (key.unique_hashes[index] == 0 && !is_emulated_stage) { continue; } UNIMPLEMENTED_IF(index == 0); diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index dc21b7e69..91ad4bf57 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -12,7 +12,7 @@ namespace Vulkan { struct RenderPassKey { - auto operator<=>(const RenderPassKey&) const noexcept = default; + bool operator==(const RenderPassKey&) const noexcept = default; std::array<VideoCore::Surface::PixelFormat, 8> color_formats; VideoCore::Surface::PixelFormat depth_format; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 4a7b633b7..c09fb3e98 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -145,7 +145,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) { if (work_queue.empty()) { wait_cv.notify_all(); } - work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); }); + Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); }); if (stop_token.stop_requested()) { continue; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 929216749..3858c506c 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -12,6 +12,7 @@ #include "common/alignment.h" #include "common/common_types.h" +#include "common/polyfill_thread.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 706d9ba74..d7be417f5 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -7,6 +7,7 @@ #include <vector> #include "common/logging/log.h" +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "core/core.h" #include "video_core/renderer_vulkan/vk_scheduler.h" diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index a4391202d..f3cc4c70b 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -12,6 +12,7 @@ #include <vector> #include "common/common_types.h" +#include "common/polyfill_ranges.h" #include "video_core/control/channel_state_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/shader_environment.h" diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index f24f320b6..958810747 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -15,6 +15,7 @@ #include "common/fs/fs.h" #include "common/fs/path_util.h" #include "common/logging/log.h" +#include "common/polyfill_ranges.h" #include "shader_recompiler/environment.h" #include "video_core/engines/kepler_compute.h" #include "video_core/memory_manager.h" diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index bb55b029f..1342fab1e 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -10,12 +10,12 @@ #include <memory> #include <optional> #include <span> -#include <stop_token> #include <type_traits> #include <unordered_map> #include <vector> #include "common/common_types.h" +#include "common/polyfill_thread.h" #include "common/unique_function.h" #include "shader_recompiler/environment.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index b618e1a25..1a76d4178 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -214,23 +214,16 @@ PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) } SurfaceType GetFormatType(PixelFormat pixel_format) { - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxColorFormat)) { + if (pixel_format < PixelFormat::MaxColorFormat) { return SurfaceType::ColorTexture; } - - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) { + if (pixel_format < PixelFormat::MaxDepthFormat) { return SurfaceType::Depth; } - - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) { + if (pixel_format < PixelFormat::MaxStencilFormat) { return SurfaceType::Stencil; } - - if (static_cast<std::size_t>(pixel_format) < - static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { + if (pixel_format < PixelFormat::MaxDepthStencilFormat) { return SurfaceType::DepthStencil; } diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index ee4f2d406..418890126 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <string> +#include "common/polyfill_ranges.h" #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h index 1efbd6507..0829d773a 100644 --- a/src/video_core/texture_cache/render_targets.h +++ b/src/video_core/texture_cache/render_targets.h @@ -13,7 +13,7 @@ namespace VideoCommon { /// Framebuffer properties used to lookup a framebuffer struct RenderTargets { - constexpr auto operator<=>(const RenderTargets&) const noexcept = default; + constexpr bool operator==(const RenderTargets&) const noexcept = default; constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept { const auto contains = [elements](ImageViewId item) { diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 46e8a86e6..1e2aad76a 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/polyfill_ranges.h" namespace VideoCommon { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 9db7195bf..587339a31 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -16,6 +16,7 @@ #include "common/hash.h" #include "common/literals.h" #include "common/lru_cache.h" +#include "common/polyfill_ranges.h" #include "video_core/compatible_formats.h" #include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" @@ -60,8 +61,6 @@ public: TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept; TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; - TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default; - TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default; DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 69a32819a..e8d7c7863 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -15,6 +15,7 @@ #include "common/alignment.h" #include "common/common_types.h" +#include "common/polyfill_ranges.h" #include "common/thread_worker.h" #include "video_core/textures/astc.h" diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 45071185a..155599316 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -7,6 +7,7 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/polyfill_ranges.h" #include "shader_recompiler/shader_info.h" #include "video_core/transform_feedback.h" diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ddecfca13..652329c38 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/literals.h" +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "video_core/vulkan_common/nsight_aftermath_tracker.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -74,23 +75,14 @@ enum class NvidiaArchitecture { }; constexpr std::array REQUIRED_EXTENSIONS{ - VK_KHR_MAINTENANCE1_EXTENSION_NAME, - VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, - VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - VK_KHR_16BIT_STORAGE_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, - VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, - VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, - VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, - VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, + + // Core in 1.2, but required due to use of extension methods, + // and well-supported by drivers + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, + VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -99,6 +91,17 @@ constexpr std::array REQUIRED_EXTENSIONS{ #endif }; +constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{ + VK_KHR_8BIT_STORAGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, +}; + +constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{ + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, +}; + template <typename T> void SetNext(void**& next, T& data) { *next = &data; @@ -327,7 +330,8 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - supported_extensions{GetSupportedExtensions(physical)}, + instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions( + physical)}, format_properties(GetFormatProperties(physical)) { CheckSuitability(surface != nullptr); SetupFamilies(surface); @@ -451,8 +455,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, variable_pointers); - VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES, .pNext = nullptr, .shaderDemoteToHelperInvocation = true, }; @@ -896,28 +900,51 @@ std::string Device::GetDriverName() const { } } +static std::vector<const char*> ExtensionsRequiredForInstanceVersion(u32 available_version) { + std::vector<const char*> extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()}; + + if (available_version < VK_API_VERSION_1_2) { + extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(), + REQUIRED_EXTENSIONS_BEFORE_1_2.end()); + } + + if (available_version < VK_API_VERSION_1_3) { + extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(), + REQUIRED_EXTENSIONS_BEFORE_1_3.end()); + } + + return extensions; +} + void Device::CheckSuitability(bool requires_swapchain) const { - std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; - bool has_swapchain = false; - for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { - const std::string_view name{property.extensionName}; - for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { - if (available_extensions[i]) { - continue; - } - available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; - } - has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME; + std::vector<const char*> required_extensions = + ExtensionsRequiredForInstanceVersion(instance_version); + std::vector<const char*> available_extensions; + + if (requires_swapchain) { + required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); } - for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { - if (available_extensions[i]) { - continue; + + auto extension_properties = physical.EnumerateDeviceExtensionProperties(); + + for (const VkExtensionProperties& property : extension_properties) { + available_extensions.push_back(property.extensionName); + } + + bool has_all_required_extensions = true; + for (const char* requirement_name : required_extensions) { + const bool found = + std::ranges::any_of(available_extensions, [&](const char* extension_name) { + return std::strcmp(requirement_name, extension_name) == 0; + }); + + if (!found) { + LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name); + has_all_required_extensions = false; } - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); - throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - if (requires_swapchain && !has_swapchain) { - LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain"); + + if (!has_all_required_extensions) { throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } @@ -940,9 +967,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } - VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; - demote.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{}; + demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES; demote.pNext = nullptr; VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; @@ -960,7 +986,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { physical.GetFeatures2KHR(features2); const VkPhysicalDeviceFeatures& features{features2.features}; - const std::array feature_report{ + std::vector feature_report{ std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.imageCubeArray, "imageCubeArray"), @@ -983,27 +1009,30 @@ void Device::CheckSuitability(bool requires_swapchain) const { "shaderStorageImageWriteWithoutFormat"), std::make_pair(features.shaderClipDistance, "shaderClipDistance"), std::make_pair(features.shaderCullDistance, "shaderCullDistance"), - std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(variable_pointers.variablePointers, "variablePointers"), std::make_pair(variable_pointers.variablePointersStorageBuffer, "variablePointersStorageBuffer"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), }; + + bool has_all_required_features = true; for (const auto& [is_supported, name] : feature_report) { - if (is_supported) { - continue; + if (!is_supported) { + LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); + has_all_required_features = false; } - LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); + } + + if (!has_all_required_features) { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } std::vector<const char*> Device::LoadExtensions(bool requires_surface) { - std::vector<const char*> extensions; - extensions.reserve(8 + REQUIRED_EXTENSIONS.size()); - extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); + std::vector<const char*> extensions = ExtensionsRequiredForInstanceVersion(instance_version); if (requires_surface) { extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d7cc6c593..c85fbba77 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -211,11 +211,6 @@ public: return khr_uniform_buffer_standard_layout; } - /// Returns true if the device supports VK_KHR_spirv_1_4. - bool IsKhrSpirv1_4Supported() const { - return khr_spirv_1_4; - } - /// Returns true if the device supports VK_KHR_push_descriptor. bool IsKhrPushDescriptorSupported() const { return khr_push_descriptor; @@ -316,6 +311,17 @@ public: return ext_shader_atomic_int64; } + /// Returns the minimum supported version of SPIR-V. + u32 SupportedSpirvVersion() const { + if (instance_version >= VK_API_VERSION_1_3) { + return 0x00010600U; + } + if (khr_spirv_1_4) { + return 0x00010400U; + } + return 0x00010000U; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index a082e3059..562039b56 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -9,18 +9,21 @@ #include "common/common_types.h" #include "common/dynamic_library.h" #include "common/logging/log.h" +#include "common/polyfill_ranges.h" #include "core/frontend/emu_window.h" #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_wrapper.h" // Include these late to avoid polluting previous headers -#ifdef _WIN32 +#if defined(_WIN32) #include <windows.h> // ensure include order #include <vulkan/vulkan_win32.h> -#endif - -#if !defined(_WIN32) && !defined(__APPLE__) +#elif defined(__APPLE__) +#include <vulkan/vulkan_macos.h> +#elif defined(__ANDROID__) +#include <vulkan/vulkan_android.h> +#else #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -39,8 +42,15 @@ namespace { case Core::Frontend::WindowSystemType::Windows: extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); break; -#endif -#if !defined(_WIN32) && !defined(__APPLE__) +#elif defined(__APPLE__) + case Core::Frontend::WindowSystemType::Cocoa: + extensions.push_back(VK_MVK_MACOS_SURFACE_EXTENSION_NAME); + break; +#elif defined(__ANDROID__) + case Core::Frontend::WindowSystemType::Android: + extensions.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME); + break; +#else case Core::Frontend::WindowSystemType::X11: extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); break; @@ -59,6 +69,10 @@ namespace { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + +#ifdef __APPLE__ + extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); +#endif return extensions; } @@ -140,7 +154,7 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD } vk::Instance instance = std::async([&] { - return vk::Instance::Create(required_version, layers, extensions, dld); + return vk::Instance::Create(available_version, layers, extensions, dld); }).get(); if (!vk::Load(*instance, dld)) { LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 6442898bd..1732866e0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/polyfill_ranges.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp index 69f9c494b..fa9bafa20 100644 --- a/src/video_core/vulkan_common/vulkan_surface.cpp +++ b/src/video_core/vulkan_common/vulkan_surface.cpp @@ -11,9 +11,11 @@ #include <windows.h> // ensure include order #include <vulkan/vulkan_win32.h> -#endif - -#if !defined(_WIN32) && !defined(__APPLE__) +#elif defined(__APPLE__) +#include <vulkan/vulkan_macos.h> +#elif defined(__ANDROID__) +#include <vulkan/vulkan_android.h> +#else #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -40,8 +42,33 @@ vk::SurfaceKHR CreateSurface(const vk::Instance& instance, throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } } -#endif -#if !defined(_WIN32) && !defined(__APPLE__) +#elif defined(__APPLE__) + if (window_info.type == Core::Frontend::WindowSystemType::Cocoa) { + const VkMacOSSurfaceCreateInfoMVK mvk_ci{VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK, + nullptr, 0, window_info.render_surface}; + const auto vkCreateMacOSSurfaceMVK = reinterpret_cast<PFN_vkCreateMacOSSurfaceMVK>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateMacOSSurfaceMVK")); + if (!vkCreateMacOSSurfaceMVK || + vkCreateMacOSSurfaceMVK(*instance, &mvk_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Metal surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } +#elif defined(__ANDROID__) + if (window_info.type == Core::Frontend::WindowSystemType::Android) { + const VkAndroidSurfaceCreateInfoKHR android_ci{ + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR, nullptr, 0, + reinterpret_cast<ANativeWindow*>(window_info.render_surface)}; + const auto vkCreateAndroidSurfaceKHR = reinterpret_cast<PFN_vkCreateAndroidSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateAndroidSurfaceKHR")); + if (!vkCreateAndroidSurfaceKHR || + vkCreateAndroidSurfaceKHR(*instance, &android_ci, nullptr, &unsafe_surface) != + VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Android surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + } +#else if (window_info.type == Core::Frontend::WindowSystemType::X11) { const VkXlibSurfaceCreateInfoKHR xlib_ci{ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, @@ -70,6 +97,7 @@ vk::SurfaceKHR CreateSurface(const vk::Instance& instance, } } #endif + if (!unsafe_surface) { LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |