diff options
Diffstat (limited to 'src/video_core')
133 files changed, 5348 insertions, 4526 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 21c46a567..d85f1e9d1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(host_shaders) + add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h @@ -98,6 +100,8 @@ add_library(video_core STATIC sampler_cache.cpp sampler_cache.h shader_cache.h + shader_notify.cpp + shader_notify.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp @@ -128,6 +132,8 @@ add_library(video_core STATIC shader/decode/other.cpp shader/ast.cpp shader/ast.h + shader/async_shaders.cpp + shader/async_shaders.h shader/compiler_settings.cpp shader/compiler_settings.h shader/control_flow.cpp @@ -240,6 +246,9 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad xbyak) +add_dependencies(video_core host_shaders) +target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) + if (ENABLE_VULKAN) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index dd7ce8c99..e7edd733f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -51,46 +51,43 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); - if (!cpu_addr_opt) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetEmptyBuffer(size); } - const VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { - const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); + if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) { + const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { u8* dest; if (is_granular) { - dest = memory_manager.GetPointer(gpu_addr); + dest = gpu_memory.GetPointer(gpu_addr); } else { staging_buffer.resize(size); dest = staging_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); + gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); } return ConstBufferUpload(dest, size); } if (is_granular) { - u8* const host_ptr = memory_manager.GetPointer(gpu_addr); + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { std::memcpy(dest, host_ptr, size); }); } else { - return StreamBufferUpload( - size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { - memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); - }); + return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) { + gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); + }); } } } - Buffer* const block = GetBlock(cpu_addr, size); - MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); + Buffer* const block = GetBlock(*cpu_addr, size); + MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size); if (!map) { return GetEmptyBuffer(size); } @@ -106,7 +103,7 @@ public: } } - return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; + return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -262,9 +259,11 @@ public: virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; protected: - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr<StreamBuffer> stream_buffer) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + std::unique_ptr<StreamBuffer> stream_buffer_) + : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, + stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} ~BufferCache() = default; @@ -326,14 +325,13 @@ private: MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { - auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; - if (memory_manager.IsGranularRange(gpu_addr, size)) { - u8* host_ptr = memory_manager.GetPointer(gpu_addr); + if (gpu_memory.IsGranularRange(gpu_addr, size)) { + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); @@ -392,7 +390,7 @@ private: continue; } staging_buffer.resize(size); - system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -431,7 +429,7 @@ private: const std::size_t size = map->end - map->start; staging_buffer.resize(size); block->Download(block->Offset(map->start), size, staging_buffer.data()); - system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); + cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -524,11 +522,8 @@ private: void MarkRegionAsWritten(VAddr start, VAddr end) { const u64 page_end = end >> WRITE_PAGE_BIT; for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - auto it = written_pages.find(page_start); - if (it != written_pages.end()) { - it->second = it->second + 1; - } else { - written_pages.insert_or_assign(page_start, 1); + if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { + ++it->second; } } } @@ -539,7 +534,7 @@ private: auto it = written_pages.find(page_start); if (it != written_pages.end()) { if (it->second > 1) { - it->second = it->second - 1; + --it->second; } else { written_pages.erase(it); } @@ -570,7 +565,8 @@ private: } VideoCore::RasterizerInterface& rasterizer; - Core::System& system; + Tegra::MemoryManager& gpu_memory; + Core::Memory::Memory& cpu_memory; std::unique_ptr<StreamBuffer> stream_buffer; BufferType stream_buffer_handle; diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index 6c426b035..b06c32c84 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -17,101 +17,94 @@ namespace { // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt constexpr std::array VIEW_CLASS_128_BITS = { - PixelFormat::RGBA32F, - PixelFormat::RGBA32UI, + PixelFormat::R32G32B32A32_FLOAT, + PixelFormat::R32G32B32A32_UINT, + PixelFormat::R32G32B32A32_SINT, }; -// Missing formats: -// PixelFormat::RGBA32I constexpr std::array VIEW_CLASS_96_BITS = { - PixelFormat::RGB32F, + PixelFormat::R32G32B32_FLOAT, }; // Missing formats: // PixelFormat::RGB32UI, // PixelFormat::RGB32I, constexpr std::array VIEW_CLASS_64_BITS = { - PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, - PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, + PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, + PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, + PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, + PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_SINT, }; -// Missing formats: -// PixelFormat::RGBA16I -// PixelFormat::RG32I // TODO: How should we handle 48 bits? constexpr std::array VIEW_CLASS_32_BITS = { - PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F, - PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI, - PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U, - PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S, - PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8, - PixelFormat::BGRA8_SRGB, + PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, + PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, + PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, + PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM, + PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM, + PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT, + PixelFormat::A2B10G10R10_UINT, }; -// Missing formats: -// PixelFormat::RGBA8UI -// PixelFormat::RGBA8I -// PixelFormat::RGB10_A2_UI // TODO: How should we handle 24 bits? constexpr std::array VIEW_CLASS_16_BITS = { - PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, - PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S, + PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, + PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, + PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, }; -// Missing formats: -// PixelFormat::RG8I constexpr std::array VIEW_CLASS_8_BITS = { - PixelFormat::R8UI, - PixelFormat::R8U, + PixelFormat::R8_UINT, + PixelFormat::R8_UNORM, + PixelFormat::R8_SINT, + PixelFormat::R8_SNORM, }; -// Missing formats: -// PixelFormat::R8I -// PixelFormat::R8S constexpr std::array VIEW_CLASS_RGTC1_RED = { - PixelFormat::DXN1, + PixelFormat::BC4_UNORM, + PixelFormat::BC4_SNORM, }; -// Missing formats: -// COMPRESSED_SIGNED_RED_RGTC1 constexpr std::array VIEW_CLASS_RGTC2_RG = { - PixelFormat::DXN2UNORM, - PixelFormat::DXN2SNORM, + PixelFormat::BC5_UNORM, + PixelFormat::BC5_SNORM, }; constexpr std::array VIEW_CLASS_BPTC_UNORM = { - PixelFormat::BC7U, - PixelFormat::BC7U_SRGB, + PixelFormat::BC7_UNORM, + PixelFormat::BC7_SRGB, }; constexpr std::array VIEW_CLASS_BPTC_FLOAT = { - PixelFormat::BC6H_SF16, - PixelFormat::BC6H_UF16, + PixelFormat::BC6H_SFLOAT, + PixelFormat::BC6H_UFLOAT, }; // Compatibility table taken from Table 4.X.1 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt constexpr std::array COPY_CLASS_128_BITS = { - PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23, - PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB, - PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB, - PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16, + PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, + PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, + PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, + PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, PixelFormat::BC6H_SFLOAT, + PixelFormat::BC6H_UFLOAT, }; // Missing formats: // PixelFormat::RGBA32I // COMPRESSED_RG_RGTC2 constexpr std::array COPY_CLASS_64_BITS = { - PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, - PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, - + PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, + PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, + PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, + PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_SINT, + PixelFormat::BC1_RGBA_UNORM, PixelFormat::BC1_RGBA_SRGB, }; // Missing formats: -// PixelFormat::RGBA16I -// PixelFormat::RG32I, // COMPRESSED_RGB_S3TC_DXT1_EXT // COMPRESSED_SRGB_S3TC_DXT1_EXT // COMPRESSED_RGBA_S3TC_DXT1_EXT diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index d1082566d..51766349b 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <array> #include <bitset> #include <cstddef> diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ff10ff40d..6e50661a3 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,13 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} +Fermi2D::Fermi2D() = default; + +Fermi2D::~Fermi2D() = default; + +void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, @@ -87,7 +93,7 @@ void Fermi2D::HandleSurfaceCopy() { copy_config.src_rect = src_rect; copy_config.dst_rect = dst_rect; - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8f37d053f..213abfaae 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -34,8 +34,11 @@ namespace Tegra::Engines { class Fermi2D final : public EngineInterface { public: - explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); - ~Fermi2D() = default; + explicit Fermi2D(); + ~Fermi2D(); + + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; @@ -149,7 +152,7 @@ public: }; private: - VideoCore::RasterizerInterface& rasterizer; + VideoCore::RasterizerInterface* rasterizer; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a82b06a38..898370739 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -16,14 +16,15 @@ namespace Tegra::Engines { -KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ - memory_manager, - regs.upload} {} +KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} KeplerCompute::~KeplerCompute() = default; +void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid KeplerCompute register, increase the size of the Regs structure"); @@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { } VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } void KeplerCompute::ProcessLaunch() { @@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() { const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - rasterizer.DispatchCompute(code_addr); + rasterizer->DispatchCompute(code_addr); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index b7f668d88..7f2500aab 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -42,10 +42,12 @@ namespace Tegra::Engines { class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { public: - explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + static constexpr std::size_t NumConstBuffers = 8; struct Regs { @@ -230,11 +232,6 @@ public: const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; private: - Core::System& system; - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; - Upload::State upload_state; - void ProcessLaunch(); /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -242,6 +239,11 @@ private: /// Retrieves information about a specific TSC entry from the TSC buffer. Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; + + Core::System& system; + MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; + Upload::State upload_state; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c01436295..33854445f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,14 +22,19 @@ using VideoCore::QueryType; /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; -Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, - macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { +Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, + upload_state{memory_manager, regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); } +Maxwell3D::~Maxwell3D() = default; + +void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + void Maxwell3D::InitializeRegisterDefaults() { // Initializes registers to their default values - what games expect them to be at boot. This is // for certain registers that may not be explicitly set by games. @@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { switch (method) { case MAXWELL3D_REG_INDEX(wait_for_idle): { - rasterizer.WaitForIdle(); + rasterizer->WaitForIdle(); break; } case MAXWELL3D_REG_INDEX(shadow_ram_control): { @@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() { const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { - rasterizer.Draw(is_indexed, true); + rasterizer->Draw(is_indexed, true); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: if (regs.query.query_get.fence == 1) { - rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); + rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); } else { StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); } @@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() { void Maxwell3D::ProcessCounterReset() { switch (regs.counter_reset) { case Regs::CounterReset::SampleCnt: - rasterizer.ResetCounter(QueryType::SamplesPassed); + rasterizer->ResetCounter(QueryType::SamplesPassed); break; default: LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", @@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() { const u32 increment = regs.sync_info.increment.Value(); [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); if (increment) { - rasterizer.SignalSyncPoint(sync_point); + rasterizer->SignalSyncPoint(sync_point); } } @@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() { const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; if (ShouldExecute()) { - rasterizer.Draw(is_indexed, false); + rasterizer->Draw(is_indexed, false); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -590,8 +595,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, - system.GPU().GetTicks()); + rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); return {}; default: LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", @@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() { regs.clear_buffers.R == regs.clear_buffers.B && regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer.Clear(); + rasterizer->Clear(); } u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { @@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { } VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ef1618990..bc289c55d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -51,9 +51,11 @@ namespace Tegra::Engines { class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { public: - explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); - ~Maxwell3D() = default; + explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); + ~Maxwell3D(); + + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. @@ -647,7 +649,7 @@ public: GetX() + GetWidth(), // right GetY() // bottom }; - }; + } f32 GetX() const { return std::max(0.0f, translate_x - std::fabs(scale_x)); @@ -1418,12 +1420,12 @@ public: return execute_on; } - VideoCore::RasterizerInterface& GetRasterizer() { - return rasterizer; + VideoCore::RasterizerInterface& Rasterizer() { + return *rasterizer; } - const VideoCore::RasterizerInterface& GetRasterizer() const { - return rasterizer; + const VideoCore::RasterizerInterface& Rasterizer() const { + return *rasterizer; } /// Notify a memory write has happened. @@ -1460,11 +1462,10 @@ private: void InitializeRegisterDefaults(); Core::System& system; - - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; + /// Start offsets of each macro in macro_memory std::array<u32, 0x80> macro_positions = {}; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01d7df405..e88290754 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -14,50 +14,45 @@ namespace Tegra::Engines { +using namespace Texture; + MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) : system{system}, memory_manager{memory_manager} {} void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid MaxwellDMA register, increase the size of the Regs structure"); + ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); regs.reg_array[method] = method_argument; -#define MAXWELLDMA_REG_INDEX(field_name) \ - (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) - - switch (method) { - case MAXWELLDMA_REG_INDEX(exec): { - HandleCopy(); - break; - } + if (method == offsetof(Regs, launch_dma) / sizeof(u32)) { + Launch(); } - -#undef MAXWELLDMA_REG_INDEX } void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { + for (size_t i = 0; i < amount; ++i) { CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); } } -void MaxwellDMA::HandleCopy() { - LOG_TRACE(HW_GPU, "Requested a DMA copy"); - - const GPUVAddr source = regs.src_address.Address(); - const GPUVAddr dest = regs.dst_address.Address(); +void MaxwellDMA::Launch() { + LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in), + static_cast<GPUVAddr>(regs.offset_out)); // TODO(Subv): Perform more research and implement all features of this engine. - ASSERT(regs.exec.enable_swizzle == 0); - ASSERT(regs.exec.query_mode == Regs::QueryMode::None); - ASSERT(regs.exec.query_intr == Regs::QueryIntr::None); - ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2); - ASSERT(regs.dst_params.pos_x == 0); - ASSERT(regs.dst_params.pos_y == 0); - - if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) { + const LaunchDMA& launch = regs.launch_dma; + ASSERT(launch.remap_enable == 0); + ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); + ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); + ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); + ASSERT(regs.dst_params.origin.x == 0); + ASSERT(regs.dst_params.origin.y == 0); + + const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; + const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; + + if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented"); return; @@ -66,144 +61,156 @@ void MaxwellDMA::HandleCopy() { // All copies here update the main memory, so mark all rasterizer states as invalid. system.GPU().Maxwell3D().OnMemoryWrite(); - if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { - // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D - // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, - // y_count). - if (!regs.exec.enable_2d) { - memory_manager.CopyBlock(dest, source, regs.x_count); - return; - } + if (is_src_pitch && is_dst_pitch) { + CopyPitchToPitch(); + } else { + ASSERT(launch.multi_line_enable == 1); - // If both the source and the destination are in linear layout, perform a line-by-line - // copy. We're going to take a subrect of size (x_count, y_count) from the source - // rectangle. There is no need to manually flush/invalidate the regions because - // CopyBlock does that for us. - for (u32 line = 0; line < regs.y_count; ++line) { - const GPUVAddr source_line = source + line * regs.src_pitch; - const GPUVAddr dest_line = dest + line * regs.dst_pitch; - memory_manager.CopyBlock(dest_line, source_line, regs.x_count); + if (!is_src_pitch && is_dst_pitch) { + CopyBlockLinearToPitch(); + } else { + CopyPitchToBlockLinear(); } - return; } +} - ASSERT(regs.exec.enable_2d == 1); - - if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { - - ASSERT(regs.src_params.BlockDepth() == 0); - // Optimized path for micro copies. - if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { - const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; - const std::size_t src_size = Texture::GetGOBSize(); - const std::size_t dst_size = regs.dst_pitch * regs.y_count; - u32 pos_x = regs.src_params.pos_x; - u32 pos_y = regs.src_params.pos_y; - const u64 offset = - Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, - regs.src_params.BlockDepth(), bytes_per_pixel); - const u32 x_in_gob = 64 / bytes_per_pixel; - pos_x = pos_x % x_in_gob; - pos_y = pos_y % 8; - - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } - - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } - - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); - } - - Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), - write_buffer.data(), regs.src_params.BlockHeight(), pos_x, - pos_y); - - memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); - - return; - } - // If the input is tiled and the output is linear, deswizzle the input and copy it over. - const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; - const std::size_t src_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, - regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); - - const std::size_t src_layer_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, - regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); +void MaxwellDMA::CopyPitchToPitch() { + // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D + // buffer of length `line_length_in`. + // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). + if (!regs.launch_dma.multi_line_enable) { + memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); + return; + } - const std::size_t dst_size = regs.dst_pitch * regs.y_count; + // Perform a line-by-line copy. + // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. + // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; + const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; + memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); + } +} - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } +void MaxwellDMA::CopyBlockLinearToPitch() { + UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); + UNIMPLEMENTED_IF(regs.src_params.layer != 0); - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } + // Optimized path for micro copies. + const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + FastCopyBlockLinearToPitch(); + return; + } - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(source, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); - } + // Deswizzle the input and copy it over. + const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; + const Parameters& src_params = regs.src_params; + const u32 width = src_params.width; + const u32 height = src_params.height; + const u32 depth = src_params.depth; + const u32 block_height = src_params.block_size.height; + const u32 block_depth = src_params.block_size.depth; + const size_t src_size = + CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); + const size_t src_layer_size = + CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - Texture::UnswizzleSubrect( - regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, - read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), - regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); - memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); - } else { - ASSERT(regs.dst_params.BlockDepth() == 0); + UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, + block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(), + read_buffer.data()); - const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); +} - const std::size_t dst_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, - regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); +void MaxwellDMA::CopyPitchToBlockLinear() { + const auto& dst_params = regs.dst_params; + const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; + const u32 width = dst_params.width; + const u32 height = dst_params.height; + const u32 depth = dst_params.depth; + const u32 block_height = dst_params.block_size.height; + const u32 block_depth = dst_params.block_size.depth; + const size_t dst_size = + CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); + const size_t dst_layer_size = + CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); + + const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - const std::size_t dst_layer_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, - regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); + if (Settings::IsGPULevelExtreme()) { + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + } else { + memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); + } - const std::size_t src_size = regs.src_pitch * regs.y_count; + // If the input is linear and the output is tiled, swizzle the input and copy it over. + if (regs.dst_params.block_size.depth > 0) { + ASSERT(dst_params.layer == 0); + SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, + bytes_per_pixel, block_height, block_depth, dst_params.origin.x, + dst_params.origin.y, write_buffer.data(), read_buffer.data()); + } else { + SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel, + write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(), + block_height, dst_params.origin.x, dst_params.origin.y); + } - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); +} - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } +void MaxwellDMA::FastCopyBlockLinearToPitch() { + const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; + const size_t src_size = GOB_SIZE; + const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; + u32 pos_x = regs.src_params.origin.x; + u32 pos_y = regs.src_params.origin.y; + const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y, + regs.src_params.block_size.height, bytes_per_pixel); + const u32 x_in_gob = 64 / bytes_per_pixel; + pos_x = pos_x % x_in_gob; + pos_y = pos_y % 8; + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(source, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); - } + if (Settings::IsGPULevelExtreme()) { + memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + } else { + memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); + } - // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::SwizzleSubrect( - regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, - write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), - regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); + UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, + bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y, + write_buffer.data(), read_buffer.data()); - memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); - } + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 502dd8509..50f445efc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -24,160 +24,190 @@ class MemoryManager; namespace Tegra::Engines { /** - * This Engine is known as GK104_Copy. Documentation can be found in: + * This engine is known as gk104_copy. Documentation can be found in: + * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml */ class MaxwellDMA final : public EngineInterface { public: - explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); - ~MaxwellDMA() = default; - - /// Write the value to the register identified by method. - void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; - - /// Write multiple values to the register identified by method. - void CallMultiMethod(u32 method, const u32* base_start, u32 amount, - u32 methods_pending) override; + struct PackedGPUVAddr { + u32 upper; + u32 lower; + + constexpr operator GPUVAddr() const noexcept { + return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower; + } + }; + + union BlockSize { + BitField<0, 4, u32> width; + BitField<4, 4, u32> height; + BitField<8, 4, u32> depth; + BitField<12, 4, u32> gob_height; + }; + static_assert(sizeof(BlockSize) == 4); + + union Origin { + BitField<0, 16, u32> x; + BitField<16, 16, u32> y; + }; + static_assert(sizeof(Origin) == 4); + + struct Parameters { + BlockSize block_size; + u32 width; + u32 height; + u32 depth; + u32 layer; + Origin origin; + }; + static_assert(sizeof(Parameters) == 24); + + struct Semaphore { + PackedGPUVAddr address; + u32 payload; + }; + static_assert(sizeof(Semaphore) == 12); + + struct RenderEnable { + enum class Mode : u32 { + FALSE = 0, + TRUE = 1, + CONDITIONAL = 2, + RENDER_IF_EQUAL = 3, + RENDER_IF_NOT_EQUAL = 4, + }; - struct Regs { - static constexpr std::size_t NUM_REGS = 0x1D6; + PackedGPUVAddr address; + BitField<0, 3, Mode> mode; + }; + static_assert(sizeof(RenderEnable) == 12); + + enum class PhysModeTarget : u32 { + LOCAL_FB = 0, + COHERENT_SYSMEM = 1, + NONCOHERENT_SYSMEM = 2, + }; + using PhysMode = BitField<0, 2, PhysModeTarget>; + + union LaunchDMA { + enum class DataTransferType : u32 { + NONE = 0, + PIPELINED = 1, + NON_PIPELINED = 2, + }; - struct Parameters { - union { - BitField<0, 4, u32> block_depth; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_width; - }; - u32 size_x; - u32 size_y; - u32 size_z; - u32 pos_z; - union { - BitField<0, 16, u32> pos_x; - BitField<16, 16, u32> pos_y; - }; + enum class SemaphoreType : u32 { + NONE = 0, + RELEASE_ONE_WORD_SEMAPHORE = 1, + RELEASE_FOUR_WORD_SEMAPHORE = 2, + }; - u32 BlockHeight() const { - return block_height.Value(); - } + enum class InterruptType : u32 { + NONE = 0, + BLOCKING = 1, + NON_BLOCKING = 2, + }; - u32 BlockDepth() const { - return block_depth.Value(); - } + enum class MemoryLayout : u32 { + BLOCKLINEAR = 0, + PITCH = 1, }; - static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); + enum class Type : u32 { + VIRTUAL = 0, + PHYSICAL = 1, + }; - enum class ComponentMode : u32 { - Src0 = 0, - Src1 = 1, - Src2 = 2, - Src3 = 3, - Const0 = 4, - Const1 = 5, - Zero = 6, + enum class SemaphoreReduction : u32 { + IMIN = 0, + IMAX = 1, + IXOR = 2, + IAND = 3, + IOR = 4, + IADD = 5, + INC = 6, + DEC = 7, + FADD = 0xA, }; - enum class CopyMode : u32 { - None = 0, - Unk1 = 1, - Unk2 = 2, + enum class SemaphoreReductionSign : u32 { + SIGNED = 0, + UNSIGNED = 1, }; - enum class QueryMode : u32 { - None = 0, - Short = 1, - Long = 2, + enum class BypassL2 : u32 { + USE_PTE_SETTING = 0, + FORCE_VOLATILE = 1, }; - enum class QueryIntr : u32 { - None = 0, - Block = 1, - NonBlock = 2, + BitField<0, 2, DataTransferType> data_transfer_type; + BitField<2, 1, u32> flush_enable; + BitField<3, 2, SemaphoreType> semaphore_type; + BitField<5, 2, InterruptType> interrupt_type; + BitField<7, 1, MemoryLayout> src_memory_layout; + BitField<8, 1, MemoryLayout> dst_memory_layout; + BitField<9, 1, u32> multi_line_enable; + BitField<10, 1, u32> remap_enable; + BitField<11, 1, u32> rmwdisable; + BitField<12, 1, Type> src_type; + BitField<13, 1, Type> dst_type; + BitField<14, 4, SemaphoreReduction> semaphore_reduction; + BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign; + BitField<19, 1, u32> reduction_enable; + BitField<20, 1, BypassL2> bypass_l2; + }; + static_assert(sizeof(LaunchDMA) == 4); + + struct RemapConst { + enum Swizzle : u32 { + SRC_X = 0, + SRC_Y = 1, + SRC_Z = 2, + SRC_W = 3, + CONST_A = 4, + CONST_B = 5, + NO_WRITE = 6, }; - union { - struct { - INSERT_UNION_PADDING_WORDS(0xC0); - - struct { - union { - BitField<0, 2, CopyMode> copy_mode; - BitField<2, 1, u32> flush; - - BitField<3, 2, QueryMode> query_mode; - BitField<5, 2, QueryIntr> query_intr; - - BitField<7, 1, u32> is_src_linear; - BitField<8, 1, u32> is_dst_linear; - - BitField<9, 1, u32> enable_2d; - BitField<10, 1, u32> enable_swizzle; - }; - } exec; - - INSERT_UNION_PADDING_WORDS(0x3F); - - struct { - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - } src_address; - - struct { - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - } dst_address; - - u32 src_pitch; - u32 dst_pitch; - u32 x_count; - u32 y_count; - - INSERT_UNION_PADDING_WORDS(0xB8); - - u32 const0; - u32 const1; - union { - BitField<0, 4, ComponentMode> component0; - BitField<4, 4, ComponentMode> component1; - BitField<8, 4, ComponentMode> component2; - BitField<12, 4, ComponentMode> component3; - BitField<16, 2, u32> component_size; - BitField<20, 3, u32> src_num_components; - BitField<24, 3, u32> dst_num_components; - - u32 SrcBytePerPixel() const { - return src_num_components.Value() * component_size.Value(); - } - u32 DstBytePerPixel() const { - return dst_num_components.Value() * component_size.Value(); - } - } swizzle_config; + PackedGPUVAddr address; - Parameters dst_params; + union { + BitField<0, 3, Swizzle> dst_x; + BitField<4, 3, Swizzle> dst_y; + BitField<8, 3, Swizzle> dst_z; + BitField<12, 3, Swizzle> dst_w; + BitField<16, 2, u32> component_size_minus_one; + BitField<20, 2, u32> num_src_components_minus_one; + BitField<24, 2, u32> num_dst_components_minus_one; + }; + }; + static_assert(sizeof(RemapConst) == 12); - INSERT_UNION_PADDING_WORDS(1); + explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); + ~MaxwellDMA() = default; - Parameters src_params; + /// Write the value to the register identified by method. + void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; - INSERT_UNION_PADDING_WORDS(0x13); - }; - std::array<u32, NUM_REGS> reg_array; - }; - } regs{}; + /// Write multiple values to the register identified by method. + void CallMultiMethod(u32 method, const u32* base_start, u32 amount, + u32 methods_pending) override; private: + /// Performs the copy from the source buffer to the destination buffer as configured in the + /// registers. + void Launch(); + + void CopyPitchToPitch(); + + void CopyBlockLinearToPitch(); + + void CopyPitchToBlockLinear(); + + void FastCopyBlockLinearToPitch(); + Core::System& system; MemoryManager& memory_manager; @@ -185,28 +215,58 @@ private: std::vector<u8> read_buffer; std::vector<u8> write_buffer; - /// Performs the copy from the source buffer to the destination buffer as configured in the - /// registers. - void HandleCopy(); -}; + static constexpr std::size_t NUM_REGS = 0x800; + struct Regs { + union { + struct { + u32 reserved[0x40]; + u32 nop; + u32 reserved01[0xf]; + u32 pm_trigger; + u32 reserved02[0x3f]; + Semaphore semaphore; + u32 reserved03[0x2]; + RenderEnable render_enable; + PhysMode src_phys_mode; + PhysMode dst_phys_mode; + u32 reserved04[0x26]; + LaunchDMA launch_dma; + u32 reserved05[0x3f]; + PackedGPUVAddr offset_in; + PackedGPUVAddr offset_out; + u32 pitch_in; + u32 pitch_out; + u32 line_length_in; + u32 line_count; + u32 reserved06[0xb8]; + RemapConst remap_const; + Parameters dst_params; + u32 reserved07[0x1]; + Parameters src_params; + u32 reserved08[0x275]; + u32 pm_trigger_end; + u32 reserved09[0x3ba]; + }; + std::array<u32, NUM_REGS> reg_array; + }; + } regs{}; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(exec, 0xC0); -ASSERT_REG_POSITION(src_address, 0x100); -ASSERT_REG_POSITION(dst_address, 0x102); -ASSERT_REG_POSITION(src_pitch, 0x104); -ASSERT_REG_POSITION(dst_pitch, 0x105); -ASSERT_REG_POSITION(x_count, 0x106); -ASSERT_REG_POSITION(y_count, 0x107); -ASSERT_REG_POSITION(const0, 0x1C0); -ASSERT_REG_POSITION(const1, 0x1C1); -ASSERT_REG_POSITION(swizzle_config, 0x1C2); -ASSERT_REG_POSITION(dst_params, 0x1C3); -ASSERT_REG_POSITION(src_params, 0x1CA); + ASSERT_REG_POSITION(launch_dma, 0xC0); + ASSERT_REG_POSITION(offset_in, 0x100); + ASSERT_REG_POSITION(offset_out, 0x102); + ASSERT_REG_POSITION(pitch_in, 0x104); + ASSERT_REG_POSITION(pitch_out, 0x105); + ASSERT_REG_POSITION(line_length_in, 0x106); + ASSERT_REG_POSITION(line_count, 0x107); + ASSERT_REG_POSITION(remap_const, 0x1C0); + ASSERT_REG_POSITION(dst_params, 0x1C3); + ASSERT_REG_POSITION(src_params, 0x1CA); #undef ASSERT_REG_POSITION +}; } // namespace Tegra::Engines diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 8b2a6a42c..de6991ef6 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -5,15 +5,10 @@ #pragma once #include <algorithm> -#include <array> -#include <memory> #include <queue> -#include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -79,8 +74,6 @@ public: } void WaitPendingFences() { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; while (!fences.empty()) { TFence& current_fence = fences.front(); if (ShouldWait()) { @@ -88,8 +81,8 @@ public: } PopAsyncFlushes(); if (current_fence->IsSemaphore()) { - memory_manager.template Write<u32>(current_fence->GetAddress(), - current_fence->GetPayload()); + gpu_memory.template Write<u32>(current_fence->GetAddress(), + current_fence->GetPayload()); } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } @@ -98,13 +91,13 @@ public: } protected: - FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - TTextureCache& texture_cache, TTBufferCache& buffer_cache, - TQueryCache& query_cache) - : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, - buffer_cache{buffer_cache}, query_cache{query_cache} {} + explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, + TQueryCache& query_cache_) + : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} - virtual ~FenceManager() {} + virtual ~FenceManager() = default; /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is /// true @@ -118,16 +111,15 @@ protected: /// Waits until a fence has been signalled by the host GPU. virtual void WaitFence(TFence& fence) = 0; - Core::System& system; VideoCore::RasterizerInterface& rasterizer; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; TTextureCache& texture_cache; TTBufferCache& buffer_cache; TQueryCache& query_cache; private: void TryReleasePendingFences() { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; while (!fences.empty()) { TFence& current_fence = fences.front(); if (ShouldWait() && !IsFenceSignaled(current_fence)) { @@ -135,8 +127,8 @@ private: } PopAsyncFlushes(); if (current_fence->IsSemaphore()) { - memory_manager.template Write<u32>(current_fence->GetAddress(), - current_fence->GetPayload()); + gpu_memory.template Write<u32>(current_fence->GetAddress(), + current_fence->GetPayload()); } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 758bfe148..4bb9256e9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -20,26 +20,35 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/shader_notify.h" #include "video_core/video_core.h" namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) - : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { - auto& rasterizer{renderer->Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); - maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); - fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); - kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); - maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); -} +GPU::GPU(Core::System& system_, bool is_async_) + : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, + dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, + maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, + fermi_2d{std::make_unique<Engines::Fermi2D>()}, + kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, + maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, + kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, + shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} GPU::~GPU() = default; +void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { + renderer = std::move(renderer_); + + VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); + memory_manager->BindRasterizer(rasterizer); + maxwell_3d->BindRasterizer(rasterizer); + fermi_2d->BindRasterizer(rasterizer); + kepler_compute->BindRasterizer(rasterizer); +} + Engines::Maxwell3D& GPU::Maxwell3D() { return *maxwell_3d; } @@ -79,7 +88,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) { } MICROPROFILE_SCOPE(GPU_wait); std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; }); + sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; }); } void GPU::IncrementSyncPoint(const u32 syncpoint_id) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2c42483bd..2d15d1c6f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -33,59 +33,68 @@ class System; namespace VideoCore { class RendererBase; +class ShaderNotify; } // namespace VideoCore namespace Tegra { enum class RenderTargetFormat : u32 { NONE = 0x0, - RGBA32_FLOAT = 0xC0, - RGBA32_UINT = 0xC2, - RGBA16_UNORM = 0xC6, - RGBA16_SNORM = 0xC7, - RGBA16_UINT = 0xC9, - RGBA16_FLOAT = 0xCA, - RG32_FLOAT = 0xCB, - RG32_UINT = 0xCD, - RGBX16_FLOAT = 0xCE, - BGRA8_UNORM = 0xCF, - BGRA8_SRGB = 0xD0, - RGB10_A2_UNORM = 0xD1, - RGBA8_UNORM = 0xD5, - RGBA8_SRGB = 0xD6, - RGBA8_SNORM = 0xD7, - RGBA8_UINT = 0xD9, - RG16_UNORM = 0xDA, - RG16_SNORM = 0xDB, - RG16_SINT = 0xDC, - RG16_UINT = 0xDD, - RG16_FLOAT = 0xDE, - R11G11B10_FLOAT = 0xE0, + R32B32G32A32_FLOAT = 0xC0, + R32G32B32A32_SINT = 0xC1, + R32G32B32A32_UINT = 0xC2, + R16G16B16A16_UNORM = 0xC6, + R16G16B16A16_SNORM = 0xC7, + R16G16B16A16_SINT = 0xC8, + R16G16B16A16_UINT = 0xC9, + R16G16B16A16_FLOAT = 0xCA, + R32G32_FLOAT = 0xCB, + R32G32_SINT = 0xCC, + R32G32_UINT = 0xCD, + R16G16B16X16_FLOAT = 0xCE, + B8G8R8A8_UNORM = 0xCF, + B8G8R8A8_SRGB = 0xD0, + A2B10G10R10_UNORM = 0xD1, + A2B10G10R10_UINT = 0xD2, + A8B8G8R8_UNORM = 0xD5, + A8B8G8R8_SRGB = 0xD6, + A8B8G8R8_SNORM = 0xD7, + A8B8G8R8_SINT = 0xD8, + A8B8G8R8_UINT = 0xD9, + R16G16_UNORM = 0xDA, + R16G16_SNORM = 0xDB, + R16G16_SINT = 0xDC, + R16G16_UINT = 0xDD, + R16G16_FLOAT = 0xDE, + B10G11R11_FLOAT = 0xE0, R32_SINT = 0xE3, R32_UINT = 0xE4, R32_FLOAT = 0xE5, - B5G6R5_UNORM = 0xE8, - BGR5A1_UNORM = 0xE9, - RG8_UNORM = 0xEA, - RG8_SNORM = 0xEB, - RG8_UINT = 0xED, + R5G6B5_UNORM = 0xE8, + A1R5G5B5_UNORM = 0xE9, + R8G8_UNORM = 0xEA, + R8G8_SNORM = 0xEB, + R8G8_SINT = 0xEC, + R8G8_UINT = 0xED, R16_UNORM = 0xEE, R16_SNORM = 0xEF, R16_SINT = 0xF0, R16_UINT = 0xF1, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, + R8_SNORM = 0xF4, + R8_SINT = 0xF5, R8_UINT = 0xF6, }; enum class DepthFormat : u32 { - Z32_FLOAT = 0xA, - Z16_UNORM = 0x13, - S8_Z24_UNORM = 0x14, - Z24_X8_UNORM = 0x15, - Z24_S8_UNORM = 0x16, - Z24_C8_UNORM = 0x18, - Z32_S8_X24_FLOAT = 0x19, + D32_FLOAT = 0xA, + D16_UNORM = 0x13, + S8_UINT_Z24_UNORM = 0x14, + D24X8_UNORM = 0x15, + D24S8_UNORM = 0x16, + D24C8_UNORM = 0x18, + D32_FLOAT_S8X24_UINT = 0x19, }; struct CommandListHeader; @@ -96,9 +105,9 @@ class DebugContext; */ struct FramebufferConfig { enum class PixelFormat : u32 { - ABGR8 = 1, - RGB565 = 4, - BGRA8 = 5, + A8B8G8R8_UNORM = 1, + RGB565_UNORM = 4, + B8G8R8A8_UNORM = 5, }; VAddr address; @@ -133,11 +142,6 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - bool is_async); - - virtual ~GPU(); - struct MethodCall { u32 method{}; u32 argument{}; @@ -153,6 +157,12 @@ public: method_count(method_count) {} }; + explicit GPU(Core::System& system, bool is_async); + virtual ~GPU(); + + /// Binds a renderer to the GPU. + void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); + /// Calls a GPU method. void CallMethod(const MethodCall& method_call); @@ -207,6 +217,14 @@ public: return *renderer; } + VideoCore::ShaderNotify& ShaderNotify() { + return *shader_notify; + } + + const VideoCore::ShaderNotify& ShaderNotify() const { + return *shader_notify; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -235,7 +253,7 @@ public: const Tegra::DmaPusher& DmaPusher() const; struct Regs { - static constexpr size_t NUM_REGS = 0x100; + static constexpr size_t NUM_REGS = 0x40; union { struct { @@ -254,7 +272,7 @@ public: u32 semaphore_trigger; INSERT_UNION_PADDING_WORDS(0xC); - // The puser and the puller share the reference counter, the pusher only has read + // The pusher and the puller share the reference counter, the pusher only has read // access u32 reference_count; INSERT_UNION_PADDING_WORDS(0x5); @@ -328,13 +346,12 @@ private: bool ExecuteMethodOnEngine(u32 method); protected: - std::unique_ptr<Tegra::DmaPusher> dma_pusher; Core::System& system; + std::unique_ptr<Tegra::MemoryManager> memory_manager; + std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<VideoCore::RendererBase> renderer; private: - std::unique_ptr<Tegra::MemoryManager> memory_manager; - /// Mapping of command subchannels to their bound engine ids std::array<EngineID, 8> bound_engines = {}; /// 3D engine @@ -347,6 +364,8 @@ private: std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine std::unique_ptr<Engines::KeplerMemory> kepler_memory; + /// Shader build notifier + std::unique_ptr<VideoCore::ShaderNotify> shader_notify; std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 7b855f63e..70a3d5738 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,16 +10,14 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer_), true), gpu_thread{system}, - cpu_context(renderer->GetRenderWindow().CreateSharedContext()), - gpu_context(std::move(context)) {} +GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); + cpu_context = renderer->GetRenderWindow().CreateSharedContext(); + cpu_context->MakeCurrent(); } void GPUAsynch::ObtainContext() { diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 15e9f1d38..f89c855a5 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -20,8 +20,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context); + explicit GPUAsynch(Core::System& system); ~GPUAsynch() override; void Start() override; @@ -42,7 +41,6 @@ protected: private: GPUThread::ThreadManager gpu_thread; std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; - std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index aaeb9811d..1ca47ddef 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,20 +7,18 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer), false), context{std::move(context)} {} +GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {} GPUSynch::~GPUSynch() = default; void GPUSynch::Start() {} void GPUSynch::ObtainContext() { - context->MakeCurrent(); + renderer->Context().MakeCurrent(); } void GPUSynch::ReleaseContext() { - context->DoneCurrent(); + renderer->Context().DoneCurrent(); } void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 762c20aa5..297258cb1 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -19,8 +19,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context); + explicit GPUSynch(Core::System& system); ~GPUSynch() override; void Start() override; @@ -36,9 +35,6 @@ public: protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, [[maybe_unused]] u32 value) const override {} - -private: - std::unique_ptr<Core::Frontend::GraphicsContext> context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 738c6f0c1..bf761abf2 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -44,9 +44,9 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, dma_pusher.DispatchCalls(); } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); - } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { + } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { renderer.Rasterizer().ReleaseFences(); - } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) { + } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt new file mode 100644 index 000000000..aa62363a7 --- /dev/null +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -0,0 +1,43 @@ +set(SHADER_FILES + opengl_present.frag + opengl_present.vert +) + +set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) +set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) + +set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) +add_custom_command( + OUTPUT + ${SHADER_DIR} + COMMAND + ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR} +) + +set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) +set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) + +foreach(FILENAME IN ITEMS ${SHADER_FILES}) + string(REPLACE "." "_" SHADER_NAME ${FILENAME}) + set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) + set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) + add_custom_command( + OUTPUT + ${HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${HEADER_GENERATOR} + ${INPUT_FILE} + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) +endforeach() + +add_custom_target(host_shaders + DEPENDS + ${SHADER_HEADERS} + SOURCES + ${SHADER_FILES} +) diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake new file mode 100644 index 000000000..368bce0ed --- /dev/null +++ b/src/video_core/host_shaders/StringShaderHeader.cmake @@ -0,0 +1,11 @@ +set(SOURCE_FILE ${CMAKE_ARGV3}) +set(HEADER_FILE ${CMAKE_ARGV4}) +set(INPUT_FILE ${CMAKE_ARGV5}) + +get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME) +string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME}) +string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) + +file(READ ${SOURCE_FILE} CONTENTS) + +configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY) diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag new file mode 100644 index 000000000..8a4cb024b --- /dev/null +++ b/src/video_core/host_shaders/opengl_present.frag @@ -0,0 +1,10 @@ +#version 430 core + +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (binding = 0) uniform sampler2D color_texture; + +void main() { + color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert new file mode 100644 index 000000000..2235d31a4 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present.vert @@ -0,0 +1,24 @@ +#version 430 core + +out gl_PerVertex { + vec4 gl_Position; +}; + +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; +layout (location = 0) out vec2 frag_tex_coord; + +// This is a truncated 3x3 matrix for 2D transformations: +// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. +// The third column performs translation. +// The third row could be used for projection, which we don't need in 2D. It hence is assumed to +// implicitly be [0, 0, 1] +layout (location = 0) uniform mat3x2 modelview_matrix; + +void main() { + // Multiply input position by the rotscale part of the matrix and then manually translate by + // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector + // to `vec3(vert_position.xy, 1.0)` + gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in new file mode 100644 index 000000000..ccdb0d2a9 --- /dev/null +++ b/src/video_core/host_shaders/source_shader.h.in @@ -0,0 +1,9 @@ +#pragma once + +#include <string_view> + +namespace HostShaders { + +constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)"; + +} // namespace HostShaders diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 4d00b84b0..31ee3440a 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -103,8 +103,9 @@ public: virtual ~CachedMacro() = default; /** * Executes the macro code with the specified input parameters. - * @param code The macro byte code to execute + * * @param parameters The parameters of the macro + * @param method The method to execute */ virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0; }; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 410f99018..df00b57df 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -12,13 +12,11 @@ namespace Tegra { namespace { // HLE'd functions -static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& parameters) { +void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); maxwell3d.regs.draw.topology.Assign( - static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & - ~(0x3ffffff << 26))); + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff)); maxwell3d.regs.vb_base_instance = parameters[5]; maxwell3d.mme_draw.instance_count = instance_count; maxwell3d.regs.vb_element_base = parameters[3]; @@ -26,15 +24,14 @@ static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, maxwell3d.regs.index_array.first = parameters[4]; if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, true); } maxwell3d.regs.index_array.count = 0; maxwell3d.mme_draw.instance_count = 0; maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& parameters) { +void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); maxwell3d.regs.vertex_buffer.first = parameters[3]; @@ -45,15 +42,14 @@ static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, maxwell3d.mme_draw.instance_count = count; if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(false, true); + maxwell3d.Rasterizer().Draw(false, true); } maxwell3d.regs.vertex_buffer.count = 0; maxwell3d.mme_draw.instance_count = 0; maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& parameters) { +void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; @@ -69,7 +65,7 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, true); } maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? maxwell3d.regs.index_array.count = 0; @@ -81,12 +77,12 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.CallMethodFromMME(0x8e5, 0x0); maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -} // namespace +} // Anonymous namespace constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ - std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), - std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), - std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), + {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, + {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, + {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index aa5256419..bd01fd1f2 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho this->parameters = std::make_unique<u32[]>(num_parameters); } std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); - this->num_parameters = num_parameters; // Execute the code until we hit an exit condition. bool keep_executing = true; diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 07292702f..954b87515 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,11 +14,11 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -static const Xbyak::Reg64 STATE = Xbyak::util::rbx; -static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; -static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; -static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; +constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; +constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; +constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, @@ -419,7 +419,6 @@ void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { void MacroJITx64Impl::Compile() { MICROPROFILE_SCOPE(MacroJitCompile); - bool keep_executing = true; labels.fill(Xbyak::Label()); Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index ff5505d12..16b2aaa27 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -4,7 +4,6 @@ #include "common/alignment.h" #include "common/assert.h" -#include "common/logging/log.h" #include "core/core.h" #include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" @@ -15,122 +14,142 @@ namespace Tegra { -MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : rasterizer{rasterizer}, system{system} { - page_table.Resize(address_space_width, page_bits, false); - - // Initialize the map with a single free region covering the entire managed space. - VirtualMemoryArea initial_vma; - initial_vma.size = address_space_end; - vma_map.emplace(initial_vma.base, initial_vma); - - UpdatePageTableForVMA(initial_vma); -} +MemoryManager::MemoryManager(Core::System& system_) + : system{system_}, page_table(page_table_size) {} MemoryManager::~MemoryManager() = default; -GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { - const u64 aligned_size{Common::AlignUp(size, page_size)}; - const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; - - AllocateMemory(gpu_addr, 0, aligned_size); +void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} +GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { + u64 remaining_size{size}; + for (u64 offset{}; offset < size; offset += page_size) { + if (remaining_size < page_size) { + SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size); + } else { + SetPageEntry(gpu_addr + offset, page_entry + offset); + } + remaining_size -= page_size; + } return gpu_addr; } -GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { - const u64 aligned_size{Common::AlignUp(size, page_size)}; - - AllocateMemory(gpu_addr, 0, aligned_size); +GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { + return UpdateRange(gpu_addr, cpu_addr, size); +} - return gpu_addr; +GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { + return Map(cpu_addr, *FindFreeRange(size, align), size); } -GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { - const u64 aligned_size{Common::AlignUp(size, page_size)}; - const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; +void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { + if (!size) { + return; + } - MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); - ASSERT( - system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess()); + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); - return gpu_addr; + UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } -GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { - ASSERT((gpu_addr & page_mask) == 0); +std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { + for (u64 offset{}; offset < size; offset += page_size) { + if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) { + return {}; + } + } - const u64 aligned_size{Common::AlignUp(size, page_size)}; + return UpdateRange(gpu_addr, PageEntry::State::Allocated, size); +} - MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); - ASSERT( - system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess()); - return gpu_addr; +GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) { + return *AllocateFixed(*FindFreeRange(size, align), size); } -GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { - ASSERT((gpu_addr & page_mask) == 0); +void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) { + if (!page_entry.IsValid()) { + return; + } - const u64 aligned_size{Common::AlignUp(size, page_size)}; - const auto cpu_addr = GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); + ASSERT(system.CurrentProcess() + ->PageTable() + .LockForDeviceAddressSpace(page_entry.ToAddress(), size) + .IsSuccess()); +} - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); +void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) { + if (!page_entry.IsValid()) { + return; + } - UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() ->PageTable() - .UnlockForDeviceAddressSpace(cpu_addr.value(), size) + .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size) .IsSuccess()); - - return gpu_addr; } -GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const { - // Find the first Free VMA. - const VMAHandle vma_handle{ - std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) { - if (vma.second.type != VirtualMemoryArea::Type::Unmapped) { - return false; - } +PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const { + return page_table[PageEntryIndex(gpu_addr)]; +} - const VAddr vma_end{vma.second.base + vma.second.size}; - return vma_end > region_start && vma_end >= region_start + size; - })}; +void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { + // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to + // improper tracking, but should be fixed in the future. - if (vma_handle == vma_map.end()) { - return {}; - } + //// Unlock the old page + // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size); - return std::max(region_start, vma_handle->second.base); -} + //// Lock the new page + // TryLockPage(page_entry, size); -bool MemoryManager::IsAddressValid(GPUVAddr addr) const { - return (addr >> page_bits) < page_table.pointers.size(); + page_table[PageEntryIndex(gpu_addr)] = page_entry; } -std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const { - if (!IsAddressValid(addr)) { - return {}; +std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const { + if (!align) { + align = page_size; + } else { + align = Common::AlignUp(align, page_size); } - const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]}; - if (cpu_addr) { - return cpu_addr + (addr & page_mask); + u64 available_size{}; + GPUVAddr gpu_addr{address_space_start}; + while (gpu_addr + available_size < address_space_size) { + if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) { + available_size += page_size; + + if (available_size >= size) { + return gpu_addr; + } + } else { + gpu_addr += available_size + page_size; + available_size = 0; + + const auto remainder{gpu_addr % align}; + if (remainder) { + gpu_addr = (gpu_addr - remainder) + align; + } + } } return {}; } -template <typename T> -T MemoryManager::Read(GPUVAddr addr) const { - if (!IsAddressValid(addr)) { +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { + const auto page_entry{GetPageEntry(gpu_addr)}; + if (!page_entry.IsValid()) { return {}; } - const u8* page_pointer{GetPointer(addr)}; - if (page_pointer) { + return page_entry.ToAddress() + (gpu_addr & page_mask); +} + +template <typename T> +T MemoryManager::Read(GPUVAddr addr) const { + if (auto page_pointer{GetPointer(addr)}; page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block T value; std::memcpy(&value, page_pointer, sizeof(T)); @@ -144,12 +163,7 @@ T MemoryManager::Read(GPUVAddr addr) const { template <typename T> void MemoryManager::Write(GPUVAddr addr, T data) { - if (!IsAddressValid(addr)) { - return; - } - - u8* page_pointer{GetPointer(addr)}; - if (page_pointer) { + if (auto page_pointer{GetPointer(addr)}; page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block std::memcpy(page_pointer, &data, sizeof(T)); return; @@ -167,66 +181,49 @@ template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data); template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data); template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data); -u8* MemoryManager::GetPointer(GPUVAddr addr) { - if (!IsAddressValid(addr)) { +u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { + if (!GetPageEntry(gpu_addr).IsValid()) { return {}; } - auto& memory = system.Memory(); - - const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; - - if (page_addr != 0) { - return memory.GetPointer(page_addr + (addr & page_mask)); + const auto address{GpuToCpuAddress(gpu_addr)}; + if (!address) { + return {}; } - LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); - return {}; + return system.Memory().GetPointer(*address); } -const u8* MemoryManager::GetPointer(GPUVAddr addr) const { - if (!IsAddressValid(addr)) { +const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { + if (!GetPageEntry(gpu_addr).IsValid()) { return {}; } - const auto& memory = system.Memory(); - - const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; - - if (page_addr != 0) { - return memory.GetPointer(page_addr + (addr & page_mask)); + const auto address{GpuToCpuAddress(gpu_addr)}; + if (!address) { + return {}; } - LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); - return {}; -} - -bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { - const std::size_t inner_size = size - 1; - const GPUVAddr end = start + inner_size; - const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); - const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); - const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); - return range == inner_size; + return system.Memory().GetPointer(*address); } -void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, - const std::size_t size) const { +void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { std::size_t remaining_size{size}; std::size_t page_index{gpu_src_addr >> page_bits}; std::size_t page_offset{gpu_src_addr & page_mask}; - auto& memory = system.Memory(); - while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(src_addr, copy_amount); - memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); + if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { + const auto src_addr{*page_addr + page_offset}; + + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. + rasterizer->FlushRegion(src_addr, copy_amount); + system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); + } page_index++; page_offset = 0; @@ -241,18 +238,17 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t page_index{gpu_src_addr >> page_bits}; std::size_t page_offset{gpu_src_addr & page_mask}; - auto& memory = system.Memory(); - while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - const u8* page_pointer = page_table.pointers[page_index]; - if (page_pointer) { - const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; - memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); + + if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { + const auto src_addr{*page_addr + page_offset}; + system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); } else { std::memset(dest_buffer, 0, copy_amount); } + page_index++; page_offset = 0; dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; @@ -260,23 +256,23 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, } } -void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, - const std::size_t size) { +void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{gpu_dest_addr >> page_bits}; std::size_t page_offset{gpu_dest_addr & page_mask}; - auto& memory = system.Memory(); - while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; - // Invalidate must happen on the rasterizer interface, such that memory is always - // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(dest_addr, copy_amount); - memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); + if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { + const auto dest_addr{*page_addr + page_offset}; + + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). + rasterizer->InvalidateRegion(dest_addr, copy_amount); + system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); + } page_index++; page_offset = 0; @@ -286,21 +282,20 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, } void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, - const std::size_t size) { + std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{gpu_dest_addr >> page_bits}; std::size_t page_offset{gpu_dest_addr & page_mask}; - auto& memory = system.Memory(); - while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - u8* page_pointer = page_table.pointers[page_index]; - if (page_pointer) { - const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; - memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); + + if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { + const auto dest_addr{*page_addr + page_offset}; + system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); } + page_index++; page_offset = 0; src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; @@ -308,273 +303,26 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf } } -void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, - const std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { std::vector<u8> tmp_buffer(size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size); WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); } void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, - const std::size_t size) { + std::size_t size) { std::vector<u8> tmp_buffer(size); ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size); WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); } -bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { - const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; - const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size; - return page <= Core::Memory::PAGE_SIZE; -} - -void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, - VAddr backing_addr) { - LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, - (base + size) * page_size); - - const VAddr end{base + size}; - ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", - base + page_table.pointers.size()); - - if (memory == nullptr) { - while (base != end) { - page_table.pointers[base] = nullptr; - page_table.backing_addr[base] = 0; - - base += 1; - } - } else { - while (base != end) { - page_table.pointers[base] = memory; - page_table.backing_addr[base] = backing_addr; - - base += 1; - memory += page_size; - backing_addr += page_size; - } - } -} - -void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) { - ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size); - ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base); - MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr); -} - -void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) { - ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size); - ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base); - MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped); -} - -bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { - ASSERT(base + size == next.base); - if (type != next.type) { - return {}; - } - if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) { - return {}; - } - if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) { - return {}; - } - return true; -} - -MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const { - if (target >= address_space_end) { - return vma_map.end(); - } else { - return std::prev(vma_map.upper_bound(target)); - } -} - -MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) { - VirtualMemoryArea& vma{vma_handle->second}; - - vma.type = VirtualMemoryArea::Type::Allocated; - vma.backing_addr = 0; - vma.backing_memory = {}; - UpdatePageTableForVMA(vma); - - return MergeAdjacent(vma_handle); -} - -MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset, - u64 size) { - - // This is the appropriately sized VMA that will turn into our allocation. - VMAIter vma_handle{CarveVMA(target, size)}; - VirtualMemoryArea& vma{vma_handle->second}; - - ASSERT(vma.size == size); - - vma.offset = offset; - - return Allocate(vma_handle); -} - -MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size, - VAddr backing_addr) { - // This is the appropriately sized VMA that will turn into our allocation. - VMAIter vma_handle{CarveVMA(target, size)}; - VirtualMemoryArea& vma{vma_handle->second}; - - ASSERT(vma.size == size); - - vma.type = VirtualMemoryArea::Type::Mapped; - vma.backing_memory = memory; - vma.backing_addr = backing_addr; - UpdatePageTableForVMA(vma); - - return MergeAdjacent(vma_handle); -} - -void MemoryManager::UnmapRange(GPUVAddr target, u64 size) { - VMAIter vma{CarveVMARange(target, size)}; - const VAddr target_end{target + size}; - const VMAIter end{vma_map.end()}; - - // The comparison against the end of the range must be done using addresses since VMAs can be - // merged during this process, causing invalidation of the iterators. - while (vma != end && vma->second.base < target_end) { - // Unmapped ranges return to allocated state and can be reused - // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games - vma = std::next(Allocate(vma)); - } - - ASSERT(FindVMA(target)->second.size >= size); -} - -MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) { - // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given - // non-const access to its container. - return vma_map.erase(iter, iter); // Erases an empty range of elements -} - -MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) { - ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size); - ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base); - - VMAIter vma_handle{StripIterConstness(FindVMA(base))}; - if (vma_handle == vma_map.end()) { - // Target address is outside the managed range - return {}; - } - - const VirtualMemoryArea& vma{vma_handle->second}; - if (vma.type == VirtualMemoryArea::Type::Mapped) { - // Region is already allocated - return vma_handle; - } - - const VAddr start_in_vma{base - vma.base}; - const VAddr end_in_vma{start_in_vma + size}; - - ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}", - vma.size, end_in_vma); - - if (end_in_vma < vma.size) { - // Split VMA at the end of the allocated region - SplitVMA(vma_handle, end_in_vma); - } - if (start_in_vma != 0) { - // Split VMA at the start of the allocated region - vma_handle = SplitVMA(vma_handle, start_in_vma); - } - - return vma_handle; -} - -MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) { - ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size); - ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target); - - const VAddr target_end{target + size}; - ASSERT(target_end >= target); - ASSERT(size > 0); - - VMAIter begin_vma{StripIterConstness(FindVMA(target))}; - const VMAIter i_end{vma_map.lower_bound(target_end)}; - if (std::any_of(begin_vma, i_end, [](const auto& entry) { - return entry.second.type == VirtualMemoryArea::Type::Unmapped; - })) { - return {}; - } - - if (target != begin_vma->second.base) { - begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); - } - - VMAIter end_vma{StripIterConstness(FindVMA(target_end))}; - if (end_vma != vma_map.end() && target_end != end_vma->second.base) { - end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); - } - - return begin_vma; -} - -MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { - VirtualMemoryArea& old_vma{vma_handle->second}; - VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA - - // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably - // a bug. This restriction might be removed later. - ASSERT(offset_in_vma < old_vma.size); - ASSERT(offset_in_vma > 0); - - old_vma.size = offset_in_vma; - new_vma.base += offset_in_vma; - new_vma.size -= offset_in_vma; - - switch (new_vma.type) { - case VirtualMemoryArea::Type::Unmapped: - break; - case VirtualMemoryArea::Type::Allocated: - new_vma.offset += offset_in_vma; - break; - case VirtualMemoryArea::Type::Mapped: - new_vma.backing_memory += offset_in_vma; - break; - } - - ASSERT(old_vma.CanBeMergedWith(new_vma)); - - return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma); -} - -MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) { - const VMAIter next_vma{std::next(iter)}; - if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { - iter->second.size += next_vma->second.size; - vma_map.erase(next_vma); - } - - if (iter != vma_map.begin()) { - VMAIter prev_vma{std::prev(iter)}; - if (prev_vma->second.CanBeMergedWith(iter->second)) { - prev_vma->second.size += iter->second.size; - vma_map.erase(iter); - iter = prev_vma; - } - } - - return iter; -} - -void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { - switch (vma.type) { - case VirtualMemoryArea::Type::Unmapped: - UnmapRegion(vma.base, vma.size); - break; - case VirtualMemoryArea::Type::Allocated: - MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr); - break; - case VirtualMemoryArea::Type::Mapped: - MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr); - break; +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { + const auto cpu_addr{GpuToCpuAddress(gpu_addr)}; + if (!cpu_addr) { + return false; } + const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size}; + return page <= Core::Memory::PAGE_SIZE; } } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 87658e87a..53c8d122a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -6,9 +6,9 @@ #include <map> #include <optional> +#include <vector> #include "common/common_types.h" -#include "common/page_table.h" namespace VideoCore { class RasterizerInterface; @@ -20,58 +20,70 @@ class System; namespace Tegra { -/** - * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space - * with homogeneous attributes across its extents. In this particular implementation each VMA is - * also backed by a single host memory allocation. - */ -struct VirtualMemoryArea { - enum class Type : u8 { - Unmapped, - Allocated, - Mapped, +class PageEntry final { +public: + enum class State : u32 { + Unmapped = static_cast<u32>(-1), + Allocated = static_cast<u32>(-2), }; - /// Virtual base address of the region. - GPUVAddr base{}; - /// Size of the region. - u64 size{}; - /// Memory area mapping type. - Type type{Type::Unmapped}; - /// CPU memory mapped address corresponding to this memory area. - VAddr backing_addr{}; - /// Offset into the backing_memory the mapping starts from. - std::size_t offset{}; - /// Pointer backing this VMA. - u8* backing_memory{}; - - /// Tests if this area can be merged to the right with `next`. - bool CanBeMergedWith(const VirtualMemoryArea& next) const; + constexpr PageEntry() = default; + constexpr PageEntry(State state) : state{state} {} + constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {} + + [[nodiscard]] constexpr bool IsUnmapped() const { + return state == State::Unmapped; + } + + [[nodiscard]] constexpr bool IsAllocated() const { + return state == State::Allocated; + } + + [[nodiscard]] constexpr bool IsValid() const { + return !IsUnmapped() && !IsAllocated(); + } + + [[nodiscard]] constexpr VAddr ToAddress() const { + if (!IsValid()) { + return {}; + } + + return static_cast<VAddr>(state) << ShiftBits; + } + + [[nodiscard]] constexpr PageEntry operator+(u64 offset) const { + // If this is a reserved value, offsets do not apply + if (!IsValid()) { + return *this; + } + return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset}; + } + +private: + static constexpr std::size_t ShiftBits{12}; + + State state{State::Unmapped}; }; +static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); class MemoryManager final { public: - explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(Core::System& system); ~MemoryManager(); - GPUVAddr AllocateSpace(u64 size, u64 align); - GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); - GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); - GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size); - GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size); - std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; + /// Binds a renderer to the memory manager. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + + [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; template <typename T> - T Read(GPUVAddr addr) const; + [[nodiscard]] T Read(GPUVAddr addr) const; template <typename T> void Write(GPUVAddr addr, T data); - u8* GetPointer(GPUVAddr addr); - const u8* GetPointer(GPUVAddr addr) const; - - /// Returns true if the block is continuous in host memory, false otherwise - bool IsBlockContinuous(GPUVAddr start, std::size_t size) const; + [[nodiscard]] u8* GetPointer(GPUVAddr addr); + [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; /** * ReadBlock and WriteBlock are full read and write operations over virtual @@ -98,92 +110,43 @@ public: void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); /** - * IsGranularRange checks if a gpu region can be simply read with a pointer + * IsGranularRange checks if a gpu region can be simply read with a pointer. */ - bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); - -private: - using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; - using VMAHandle = VMAMap::const_iterator; - using VMAIter = VMAMap::iterator; - - bool IsAddressValid(GPUVAddr addr) const; - void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, - VAddr backing_addr = 0); - void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr); - void UnmapRegion(GPUVAddr base, u64 size); + [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; - /// Finds the VMA in which the given address is included in, or `vma_map.end()`. - VMAHandle FindVMA(GPUVAddr target) const; - - VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size); - - /** - * Maps an unmanaged host memory pointer at a given address. - * - * @param target The guest address to start the mapping at. - * @param memory The memory to be mapped. - * @param size Size of the mapping in bytes. - * @param backing_addr The base address of the range to back this mapping. - */ - VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr); + [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); + [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); + void Unmap(GPUVAddr gpu_addr, std::size_t size); - /// Unmaps a range of addresses, splitting VMAs as necessary. - void UnmapRange(GPUVAddr target, u64 size); - - /// Converts a VMAHandle to a mutable VMAIter. - VMAIter StripIterConstness(const VMAHandle& iter); - - /// Marks as the specified VMA as allocated. - VMAIter Allocate(VMAIter vma); - - /** - * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing - * the appropriate error checking. - */ - VMAIter CarveVMA(GPUVAddr base, u64 size); - - /** - * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each - * end of the range. - */ - VMAIter CarveVMARange(GPUVAddr base, u64 size); - - /** - * Splits a VMA in two, at the specified offset. - * @returns the right side of the split, with the original iterator becoming the left side. - */ - VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma); - - /** - * Checks for and merges the specified VMA with adjacent ones if possible. - * @returns the merged VMA or the original if no merging was possible. - */ - VMAIter MergeAdjacent(VMAIter vma); +private: + [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; + void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); + GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); + [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; - /// Updates the pages corresponding to this VMA so they match the VMA's attributes. - void UpdatePageTableForVMA(const VirtualMemoryArea& vma); + void TryLockPage(PageEntry page_entry, std::size_t size); + void TryUnlockPage(PageEntry page_entry, std::size_t size); - /// Finds a free (unmapped region) of the specified size starting at the specified address. - GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const; + [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { + return (gpu_addr >> page_bits) & page_table_mask; + } -private: + static constexpr u64 address_space_size = 1ULL << 40; + static constexpr u64 address_space_start = 1ULL << 32; static constexpr u64 page_bits{16}; static constexpr u64 page_size{1 << page_bits}; static constexpr u64 page_mask{page_size - 1}; + static constexpr u64 page_table_bits{24}; + static constexpr u64 page_table_size{1 << page_table_bits}; + static constexpr u64 page_table_mask{page_table_size - 1}; - /// Address space in bits, according to Tegra X1 TRM - static constexpr u32 address_space_width{40}; - /// Start address for mapping, this is fairly arbitrary but must be non-zero. - static constexpr GPUVAddr address_space_base{0x100000}; - /// End of address space, based on address space in bits. - static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; + Core::System& system; - Common::PageTable page_table; - VMAMap vma_map; - VideoCore::RasterizerInterface& rasterizer; + VideoCore::RasterizerInterface* rasterizer = nullptr; - Core::System& system; + std::vector<PageEntry> page_table; }; } // namespace Tegra diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 836b25c1d..9da9fb4ff 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -41,146 +41,168 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth } static constexpr ConversionArray morton_to_linear_fns = { - MortonCopy<true, PixelFormat::ABGR8U>, - MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::ABGR8UI>, - MortonCopy<true, PixelFormat::B5G6R5U>, - MortonCopy<true, PixelFormat::A2B10G10R10U>, - MortonCopy<true, PixelFormat::A1B5G5R5U>, - MortonCopy<true, PixelFormat::R8U>, - MortonCopy<true, PixelFormat::R8UI>, - MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::RGBA16U>, - MortonCopy<true, PixelFormat::RGBA16S>, - MortonCopy<true, PixelFormat::RGBA16UI>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, - MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, - MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, - MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, - MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, - MortonCopy<true, PixelFormat::BC6H_UF16>, - MortonCopy<true, PixelFormat::BC6H_SF16>, - MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, - MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, - MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16U>, - MortonCopy<true, PixelFormat::R16S>, - MortonCopy<true, PixelFormat::R16UI>, - MortonCopy<true, PixelFormat::R16I>, - MortonCopy<true, PixelFormat::RG16>, - MortonCopy<true, PixelFormat::RG16F>, - MortonCopy<true, PixelFormat::RG16UI>, - MortonCopy<true, PixelFormat::RG16I>, - MortonCopy<true, PixelFormat::RG16S>, - MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::RGBA8_SRGB>, - MortonCopy<true, PixelFormat::RG8U>, - MortonCopy<true, PixelFormat::RG8S>, - MortonCopy<true, PixelFormat::RG8UI>, - MortonCopy<true, PixelFormat::RG32UI>, - MortonCopy<true, PixelFormat::RGBX16F>, - MortonCopy<true, PixelFormat::R32UI>, - MortonCopy<true, PixelFormat::R32I>, - MortonCopy<true, PixelFormat::ASTC_2D_8X8>, - MortonCopy<true, PixelFormat::ASTC_2D_8X5>, - MortonCopy<true, PixelFormat::ASTC_2D_5X4>, - MortonCopy<true, PixelFormat::BGRA8_SRGB>, - MortonCopy<true, PixelFormat::DXT1_SRGB>, - MortonCopy<true, PixelFormat::DXT23_SRGB>, - MortonCopy<true, PixelFormat::DXT45_SRGB>, - MortonCopy<true, PixelFormat::BC7U_SRGB>, - MortonCopy<true, PixelFormat::R4G4B4A4U>, + MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, + MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, + MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, + MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, + MortonCopy<true, PixelFormat::R5G6B5_UNORM>, + MortonCopy<true, PixelFormat::B5G6R5_UNORM>, + MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, + MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, + MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, + MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, + MortonCopy<true, PixelFormat::R8_UNORM>, + MortonCopy<true, PixelFormat::R8_SNORM>, + MortonCopy<true, PixelFormat::R8_SINT>, + MortonCopy<true, PixelFormat::R8_UINT>, + MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, + MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, + MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, + MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, + MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, + MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, + MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, + MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, + MortonCopy<true, PixelFormat::BC2_UNORM>, + MortonCopy<true, PixelFormat::BC3_UNORM>, + MortonCopy<true, PixelFormat::BC4_UNORM>, + MortonCopy<true, PixelFormat::BC4_SNORM>, + MortonCopy<true, PixelFormat::BC5_UNORM>, + MortonCopy<true, PixelFormat::BC5_SNORM>, + MortonCopy<true, PixelFormat::BC7_UNORM>, + MortonCopy<true, PixelFormat::BC6H_UFLOAT>, + MortonCopy<true, PixelFormat::BC6H_SFLOAT>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, + MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, + MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, + MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, + MortonCopy<true, PixelFormat::R32G32_FLOAT>, + MortonCopy<true, PixelFormat::R32G32_SINT>, + MortonCopy<true, PixelFormat::R32_FLOAT>, + MortonCopy<true, PixelFormat::R16_FLOAT>, + MortonCopy<true, PixelFormat::R16_UNORM>, + MortonCopy<true, PixelFormat::R16_SNORM>, + MortonCopy<true, PixelFormat::R16_UINT>, + MortonCopy<true, PixelFormat::R16_SINT>, + MortonCopy<true, PixelFormat::R16G16_UNORM>, + MortonCopy<true, PixelFormat::R16G16_FLOAT>, + MortonCopy<true, PixelFormat::R16G16_UINT>, + MortonCopy<true, PixelFormat::R16G16_SINT>, + MortonCopy<true, PixelFormat::R16G16_SNORM>, + MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, + MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, + MortonCopy<true, PixelFormat::R8G8_UNORM>, + MortonCopy<true, PixelFormat::R8G8_SNORM>, + MortonCopy<true, PixelFormat::R8G8_SINT>, + MortonCopy<true, PixelFormat::R8G8_UINT>, + MortonCopy<true, PixelFormat::R32G32_UINT>, + MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, + MortonCopy<true, PixelFormat::R32_UINT>, + MortonCopy<true, PixelFormat::R32_SINT>, + MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, + MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, + MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, + MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, + MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, + MortonCopy<true, PixelFormat::BC2_SRGB>, + MortonCopy<true, PixelFormat::BC3_SRGB>, + MortonCopy<true, PixelFormat::BC7_SRGB>, + MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_5X5>, + MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X8>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X6>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X10>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_12X12>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_8X6>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X5>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, - MortonCopy<true, PixelFormat::E5B9G9R9F>, - MortonCopy<true, PixelFormat::Z32F>, - MortonCopy<true, PixelFormat::Z16>, - MortonCopy<true, PixelFormat::Z24S8>, - MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32FS8>, + MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, + MortonCopy<true, PixelFormat::D32_FLOAT>, + MortonCopy<true, PixelFormat::D16_UNORM>, + MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, + MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, + MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, }; static constexpr ConversionArray linear_to_morton_fns = { - MortonCopy<false, PixelFormat::ABGR8U>, - MortonCopy<false, PixelFormat::ABGR8S>, - MortonCopy<false, PixelFormat::ABGR8UI>, - MortonCopy<false, PixelFormat::B5G6R5U>, - MortonCopy<false, PixelFormat::A2B10G10R10U>, - MortonCopy<false, PixelFormat::A1B5G5R5U>, - MortonCopy<false, PixelFormat::R8U>, - MortonCopy<false, PixelFormat::R8UI>, - MortonCopy<false, PixelFormat::RGBA16F>, - MortonCopy<false, PixelFormat::RGBA16S>, - MortonCopy<false, PixelFormat::RGBA16U>, - MortonCopy<false, PixelFormat::RGBA16UI>, - MortonCopy<false, PixelFormat::R11FG11FB10F>, - MortonCopy<false, PixelFormat::RGBA32UI>, - MortonCopy<false, PixelFormat::DXT1>, - MortonCopy<false, PixelFormat::DXT23>, - MortonCopy<false, PixelFormat::DXT45>, - MortonCopy<false, PixelFormat::DXN1>, - MortonCopy<false, PixelFormat::DXN2UNORM>, - MortonCopy<false, PixelFormat::DXN2SNORM>, - MortonCopy<false, PixelFormat::BC7U>, - MortonCopy<false, PixelFormat::BC6H_UF16>, - MortonCopy<false, PixelFormat::BC6H_SF16>, + MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, + MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, + MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, + MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, + MortonCopy<false, PixelFormat::R5G6B5_UNORM>, + MortonCopy<false, PixelFormat::B5G6R5_UNORM>, + MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, + MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, + MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, + MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, + MortonCopy<false, PixelFormat::R8_UNORM>, + MortonCopy<false, PixelFormat::R8_SNORM>, + MortonCopy<false, PixelFormat::R8_SINT>, + MortonCopy<false, PixelFormat::R8_UINT>, + MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, + MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, + MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, + MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, + MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, + MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, + MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, + MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, + MortonCopy<false, PixelFormat::BC2_UNORM>, + MortonCopy<false, PixelFormat::BC3_UNORM>, + MortonCopy<false, PixelFormat::BC4_UNORM>, + MortonCopy<false, PixelFormat::BC4_SNORM>, + MortonCopy<false, PixelFormat::BC5_UNORM>, + MortonCopy<false, PixelFormat::BC5_SNORM>, + MortonCopy<false, PixelFormat::BC7_UNORM>, + MortonCopy<false, PixelFormat::BC6H_UFLOAT>, + MortonCopy<false, PixelFormat::BC6H_SFLOAT>, // TODO(Subv): Swizzling ASTC formats are not supported nullptr, - MortonCopy<false, PixelFormat::BGRA8>, - MortonCopy<false, PixelFormat::RGBA32F>, - MortonCopy<false, PixelFormat::RG32F>, - MortonCopy<false, PixelFormat::R32F>, - MortonCopy<false, PixelFormat::R16F>, - MortonCopy<false, PixelFormat::R16U>, - MortonCopy<false, PixelFormat::R16S>, - MortonCopy<false, PixelFormat::R16UI>, - MortonCopy<false, PixelFormat::R16I>, - MortonCopy<false, PixelFormat::RG16>, - MortonCopy<false, PixelFormat::RG16F>, - MortonCopy<false, PixelFormat::RG16UI>, - MortonCopy<false, PixelFormat::RG16I>, - MortonCopy<false, PixelFormat::RG16S>, - MortonCopy<false, PixelFormat::RGB32F>, - MortonCopy<false, PixelFormat::RGBA8_SRGB>, - MortonCopy<false, PixelFormat::RG8U>, - MortonCopy<false, PixelFormat::RG8S>, - MortonCopy<false, PixelFormat::RG8UI>, - MortonCopy<false, PixelFormat::RG32UI>, - MortonCopy<false, PixelFormat::RGBX16F>, - MortonCopy<false, PixelFormat::R32UI>, - MortonCopy<false, PixelFormat::R32I>, + MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, + MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, + MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, + MortonCopy<false, PixelFormat::R32G32_FLOAT>, + MortonCopy<false, PixelFormat::R32G32_SINT>, + MortonCopy<false, PixelFormat::R32_FLOAT>, + MortonCopy<false, PixelFormat::R16_FLOAT>, + MortonCopy<false, PixelFormat::R16_UNORM>, + MortonCopy<false, PixelFormat::R16_SNORM>, + MortonCopy<false, PixelFormat::R16_UINT>, + MortonCopy<false, PixelFormat::R16_SINT>, + MortonCopy<false, PixelFormat::R16G16_UNORM>, + MortonCopy<false, PixelFormat::R16G16_FLOAT>, + MortonCopy<false, PixelFormat::R16G16_UINT>, + MortonCopy<false, PixelFormat::R16G16_SINT>, + MortonCopy<false, PixelFormat::R16G16_SNORM>, + MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, + MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, + MortonCopy<false, PixelFormat::R8G8_UNORM>, + MortonCopy<false, PixelFormat::R8G8_SNORM>, + MortonCopy<false, PixelFormat::R8G8_SINT>, + MortonCopy<false, PixelFormat::R8G8_UINT>, + MortonCopy<false, PixelFormat::R32G32_UINT>, + MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, + MortonCopy<false, PixelFormat::R32_UINT>, + MortonCopy<false, PixelFormat::R32_SINT>, nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::BGRA8_SRGB>, - MortonCopy<false, PixelFormat::DXT1_SRGB>, - MortonCopy<false, PixelFormat::DXT23_SRGB>, - MortonCopy<false, PixelFormat::DXT45_SRGB>, - MortonCopy<false, PixelFormat::BC7U_SRGB>, - MortonCopy<false, PixelFormat::R4G4B4A4U>, + MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, + MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, + MortonCopy<false, PixelFormat::BC2_SRGB>, + MortonCopy<false, PixelFormat::BC3_SRGB>, + MortonCopy<false, PixelFormat::BC7_SRGB>, + MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, nullptr, nullptr, nullptr, @@ -199,12 +221,12 @@ static constexpr ConversionArray linear_to_morton_fns = { nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::E5B9G9R9F>, - MortonCopy<false, PixelFormat::Z32F>, - MortonCopy<false, PixelFormat::Z16>, - MortonCopy<false, PixelFormat::Z24S8>, - MortonCopy<false, PixelFormat::S8Z24>, - MortonCopy<false, PixelFormat::Z32FS8>, + MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, + MortonCopy<false, PixelFormat::D32_FLOAT>, + MortonCopy<false, PixelFormat::D16_UNORM>, + MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, + MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, + MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, }; static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 0d3a88765..d13a66bb6 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -95,10 +95,12 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo class QueryPool> class QueryCacheBase { public: - explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ - static_cast<QueryCache&>(*this), - VideoCore::QueryType::SamplesPassed}}} {} + explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_) + : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this), + VideoCore::QueryType::SamplesPassed}}} {} void InvalidateRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; @@ -118,29 +120,27 @@ public: */ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { std::unique_lock lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr_opt); - VAddr cpu_addr = *cpu_addr_opt; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); - CachedQuery* query = TryGet(cpu_addr); + CachedQuery* query = TryGet(*cpu_addr); if (!query) { - ASSERT_OR_EXECUTE(cpu_addr_opt, return;); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); - query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); } query->BindCounter(Stream(type).Current(), timestamp); if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - AsyncFlushQuery(cpu_addr); + AsyncFlushQuery(*cpu_addr); } } /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. void UpdateCounters() { std::unique_lock lock{mutex}; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); } @@ -270,8 +270,9 @@ private: static constexpr std::uintptr_t PAGE_SIZE = 4096; static constexpr unsigned PAGE_BITS = 12; - Core::System& system; VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; std::recursive_mutex mutex; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3cbdac8e7..b3e0919f8 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -106,11 +106,8 @@ public: virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} /// Initialize disk cached resources for the game being emulated - virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, - const DiskResourceLoadCallback& callback = {}) {} - - /// Initializes renderer dirty flags - virtual void SetupDirtyFlags() {} + virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, + const DiskResourceLoadCallback& callback) {} /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. GuestDriverProfile& AccessGuestDriverProfile() { diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index dfb06e87e..a93a1732c 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -9,7 +9,9 @@ namespace VideoCore { -RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} { +RendererBase::RendererBase(Core::Frontend::EmuWindow& window_, + std::unique_ptr<Core::Frontend::GraphicsContext> context_) + : render_window{window_}, context{std::move(context_)} { RefreshBaseSettings(); } diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 1d85219b6..649074acd 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -15,7 +15,8 @@ namespace Core::Frontend { class EmuWindow; -} +class GraphicsContext; +} // namespace Core::Frontend namespace VideoCore { @@ -25,14 +26,15 @@ struct RendererSettings { // Screenshot std::atomic<bool> screenshot_requested{false}; - void* screenshot_bits; + void* screenshot_bits{}; std::function<void()> screenshot_complete_callback; Layout::FramebufferLayout screenshot_framebuffer_layout; }; class RendererBase : NonCopyable { public: - explicit RendererBase(Core::Frontend::EmuWindow& window); + explicit RendererBase(Core::Frontend::EmuWindow& window, + std::unique_ptr<Core::Frontend::GraphicsContext> context); virtual ~RendererBase(); /// Initialize the renderer @@ -68,6 +70,14 @@ public: return *rasterizer; } + Core::Frontend::GraphicsContext& Context() { + return *context; + } + + const Core::Frontend::GraphicsContext& Context() const { + return *context; + } + Core::Frontend::EmuWindow& GetRenderWindow() { return render_window; } @@ -94,6 +104,7 @@ public: protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<RasterizerInterface> rasterizer; + std::unique_ptr<Core::Frontend::GraphicsContext> context; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index eb5158407..b7e9ed2e9 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) { return type; } -std::string GlobalMemoryName(const GlobalMemoryBase& base) { - return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); -} - class ARBDecompiler final { public: explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, @@ -199,6 +195,8 @@ public: } private: + void DefineGlobalMemory(); + void DeclareHeader(); void DeclareVertex(); void DeclareGeometry(); @@ -228,6 +226,7 @@ private: std::pair<std::string, std::size_t> BuildCoords(Operation); std::string BuildAoffi(Operation); + std::string GlobalMemoryPointer(const GmemNode& gmem); void Exit(); std::string Assign(Operation); @@ -378,10 +377,8 @@ private: std::string address; std::string_view opname; if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); - opname = "ATOMB"; + address = GlobalMemoryPointer(*gmem); + opname = "ATOM"; } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; @@ -456,9 +453,13 @@ private: shader_source += '\n'; } - std::string AllocTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}.x", num_temporaries++); + std::string AllocLongVectorTemporary() { + max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); + return fmt::format("L{}", num_long_temporaries++); + } + + std::string AllocLongTemporary() { + return fmt::format("{}.x", AllocLongVectorTemporary()); } std::string AllocVectorTemporary() { @@ -466,8 +467,13 @@ private: return fmt::format("T{}", num_temporaries++); } + std::string AllocTemporary() { + return fmt::format("{}.x", AllocVectorTemporary()); + } + void ResetTemporaries() noexcept { num_temporaries = 0; + num_long_temporaries = 0; } const Device& device; @@ -478,6 +484,11 @@ private: std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; + std::size_t num_long_temporaries = 0; + std::size_t max_long_temporaries = 0; + + std::map<GlobalMemoryBase, u32> global_memory_names; + std::string shader_source; static constexpr std::string_view ADD_F32 = "ADD.F32"; @@ -784,6 +795,8 @@ private: ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier) : device{device}, ir{ir}, registry{registry}, stage{stage} { + DefineGlobalMemory(); + AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); @@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) { } } +void ARBDecompiler::DefineGlobalMemory() { + u32 binding = 0; + for (const auto& pair : ir.GetGlobalMemory()) { + const GlobalMemoryBase base = pair.first; + global_memory_names.emplace(base, binding); + ++binding; + } +} + void ARBDecompiler::DeclareHeader() { AddLine("!!NV{}5.0", HeaderStageName(stage)); // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D AddLine("OPTION NV_internal;"); AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_storage_buffer;"); AddLine("OPTION NV_shader_thread_group;"); if (ir.UsesWarps() && device.HasWarpIntrinsics()) { AddLine("OPTION NV_shader_thread_shuffle;"); @@ -892,11 +913,19 @@ void ARBDecompiler::DeclareCompute() { const ComputeInfo& info = registry.GetComputeInfo(); AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - if (info.shared_memory_size_in_words > 0) { - const u32 size_in_bytes = info.shared_memory_size_in_words * 4; - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); + if (info.shared_memory_size_in_words == 0) { + return; + } + const u32 limit = device.GetMaxComputeSharedMemorySize(); + u32 size_in_bytes = info.shared_memory_size_in_words * 4; + if (size_in_bytes > limit) { + LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", + size_in_bytes, limit); + size_in_bytes = limit; } + + AddLine("SHARED_MEMORY {};", size_in_bytes); + AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); } void ARBDecompiler::DeclareInputAttributes() { @@ -951,11 +980,10 @@ void ARBDecompiler::DeclareLocalMemory() { } void ARBDecompiler::DeclareGlobalMemory() { - u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& pair : ir.GetGlobalMemory()) { - const auto& base = pair.first; - AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); - ++binding; + const std::size_t num_entries = ir.GetGlobalMemory().size(); + if (num_entries > 0) { + const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; + AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); } } @@ -977,6 +1005,9 @@ void ARBDecompiler::DeclareTemporaries() { for (std::size_t i = 0; i < max_temporaries; ++i) { AddLine("TEMP T{};", i); } + for (std::size_t i = 0; i < max_long_temporaries; ++i) { + AddLine("LONG TEMP L{};", i); + } } void ARBDecompiler::DeclarePredicates() { @@ -1260,13 +1291,6 @@ std::string ARBDecompiler::Visit(const Node& node) { return "{0, 0, 0, 0}.x"; } - const auto buffer_index = [this, &abuf]() -> std::string { - if (stage != ShaderType::Geometry) { - return ""; - } - return fmt::format("[{}]", Visit(abuf->GetBuffer())); - }; - const Attribute::Index index = abuf->GetIndex(); const u32 element = abuf->GetElement(); const char swizzle = Swizzle(element); @@ -1339,10 +1363,7 @@ std::string ARBDecompiler::Visit(const Node& node) { if (const auto gmem = std::get_if<GmemNode>(&*node)) { std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } @@ -1375,7 +1396,7 @@ std::string ARBDecompiler::Visit(const Node& node) { return {}; } - if (const auto cmt = std::get_if<CommentNode>(&*node)) { + if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { // Uncommenting this will generate invalid code. GLASM lacks comments. // AddLine("// {}", cmt->GetText()); return {}; @@ -1419,6 +1440,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { return fmt::format(", offset({})", temporary); } +std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { + const u32 binding = global_memory_names.at(gmem.GetDescriptor()); + const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; + + const std::string pointer = AllocLongVectorTemporary(); + std::string temporary = AllocTemporary(); + + const u32 local_index = binding / 2; + AddLine("PK64.U {}, c[{}];", pointer, local_index); + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), + Visit(gmem.GetBaseAddress())); + AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); + AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); + return fmt::format("{}.x", pointer); +} + void ARBDecompiler::Exit() { if (stage != ShaderType::Fragment) { AddLine("RET;"); @@ -1515,11 +1552,7 @@ std::string ARBDecompiler::Assign(Operation operation) { ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); ResetTemporaries(); return {}; } else { @@ -1671,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) { } std::string ARBDecompiler::HUnpack(Operation operation) { - const std::string operand = Visit(operation[0]); + std::string operand = Visit(operation[0]); switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { case Tegra::Shader::HalfType::H0_H1: return operand; @@ -2021,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) { std::string ARBDecompiler::YNegate(Operation) { LOG_WARNING(Render_OpenGL, "(STUBBED)"); - const std::string temporary = AllocTemporary(); + std::string temporary = AllocTemporary(); AddLine("MOV.F {}, 1;", temporary); return temporary; } @@ -2044,10 +2077,6 @@ std::string ARBDecompiler::ShuffleIndexed(Operation operation) { } std::string ARBDecompiler::Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); - return {}; - } AddLine("BAR;"); return {}; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e461e4c70..b1c4cd62f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } @@ -59,9 +59,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); } -OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, +OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const Device& device_, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, + : GenericBufferCache{rasterizer, gpu_memory, cpu_memory, std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, device{device_} { if (!device.HasFastBufferSubData()) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 88fdc0536..f75b32e31 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -52,7 +52,8 @@ private: using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; class OGLBufferCache final : public GenericBufferCache { public: - explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, + explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const Device& device, std::size_t stream_size); ~OGLBufferCache(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c1f20f0ab..e7d95149f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -212,6 +212,7 @@ Device::Device() shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); + max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -233,6 +234,8 @@ Device::Device() GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); @@ -248,6 +251,7 @@ Device::Device(std::nullptr_t) { shader_storage_alignment = 4; max_vertex_attributes = 16; max_varyings = 15; + max_compute_shared_memory_size = 0x10000; has_warp_intrinsics = true; has_shader_ballot = true; has_vertex_viewport_layer = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e1d811966..8a4b6b9fc 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -52,6 +52,10 @@ public: return max_varyings; } + u32 GetMaxComputeSharedMemorySize() const { + return max_compute_shared_memory_size; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -104,6 +108,10 @@ public: return use_assembly_shaders; } + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -114,6 +122,7 @@ private: std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; + u32 max_compute_shared_memory_size{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; @@ -127,6 +136,7 @@ private: bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool use_assembly_shaders{}; + bool use_asynchronous_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index ec5421afa..b532fdcc2 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -4,16 +4,17 @@ #include "common/assert.h" +#include <glad/glad.h> + #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_fence_manager.h" namespace OpenGL { -GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {} +GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {} GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {} + : FenceBase(address, payload, is_stubbed) {} GLInnerFence::~GLInnerFence() = default; @@ -44,11 +45,10 @@ void GLInnerFence::Wait() { glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); } -FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer, +FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, QueryCache& query_cache) - : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {} + : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {} Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { return std::make_shared<GLInnerFence>(value, is_stubbed); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index c917b3343..da1dcdace 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -5,7 +5,6 @@ #pragma once #include <memory> -#include <glad/glad.h> #include "common/common_types.h" #include "video_core/fence_manager.h" @@ -38,9 +37,9 @@ using GenericFenceManager = class FenceManagerOpenGL final : public GenericFenceManager { public: - FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, - QueryCache& query_cache); + explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, + QueryCache& query_cache); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index d7ba57aca..2bb8ec2b8 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -30,12 +30,13 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { } // Anonymous namespace -QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) +QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory) : VideoCommon::QueryCacheBase< QueryCache, CachedQuery, CounterStream, HostCounter, - std::vector<OGLQuery>>{system, - static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, - gl_rasterizer{gl_rasterizer} {} + std::vector<OGLQuery>>{static_cast<VideoCore::RasterizerInterface&>(rasterizer), + maxwell3d, gpu_memory}, + gl_rasterizer{rasterizer} {} QueryCache::~QueryCache() = default; diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index d8e7052a1..dd626b66b 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -29,7 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter, std::vector<OGLQuery>> { public: - explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory); ~QueryCache(); OGLQuery AllocateQuery(VideoCore::QueryType type); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e960a0ef1..bbb2eb17c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,17 +139,33 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } +void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { + if (num_entries == 0) { + return; + } + if (num_entries % 2 == 1) { + pointers[num_entries] = 0; + } + const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); + glProgramLocalParametersI4uivNV(target, 0, num_vectors, + reinterpret_cast<const GLuint*>(pointers)); +} + } // Anonymous namespace -RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - const Device& device, ScreenInfo& info, - ProgramManager& program_manager, StateTracker& state_tracker) - : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, - state_tracker}, - shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, - fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, - screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, + Core::Memory::Memory& cpu_memory, const Device& device_, + ScreenInfo& screen_info_, ProgramManager& program_manager_, + StateTracker& state_tracker_) + : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), + screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), + texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), + shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device), + query_cache(*this, maxwell3d, gpu_memory), + buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE), + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), + async_shaders(emu_window) { CheckExtensions(); unified_uniform_buffer.Create(); @@ -162,6 +178,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind nullptr, 0); } } + + if (device.UseAsynchronousShaders()) { + async_shaders.AllocateWorkers(); + } } RasterizerOpenGL::~RasterizerOpenGL() { @@ -179,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() { } void RasterizerOpenGL::SetupVertexFormat() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { return; } @@ -200,7 +219,7 @@ void RasterizerOpenGL::SetupVertexFormat() { } flags[Dirty::VertexFormat0 + index] = false; - const auto attrib = gpu.regs.vertex_attrib_format[index]; + const auto attrib = maxwell3d.regs.vertex_attrib_format[index]; const auto gl_index = static_cast<GLuint>(index); // Disable constant attributes. @@ -224,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() { } void RasterizerOpenGL::SetupVertexBuffer() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexBuffers]) { return; } @@ -236,7 +254,7 @@ void RasterizerOpenGL::SetupVertexBuffer() { const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); // Upload all guest vertex arrays sequentially to our buffer - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -273,14 +291,13 @@ void RasterizerOpenGL::SetupVertexBuffer() { } void RasterizerOpenGL::SetupVertexInstances() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexInstances]) { return; } flags[Dirty::VertexInstances] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexInstance0 + index]) { continue; @@ -296,7 +313,7 @@ void RasterizerOpenGL::SetupVertexInstances() { GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t size = CalculateIndexBufferSize(); const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); @@ -305,16 +322,14 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); - auto& gpu = system.GPU().Maxwell3D(); - std::size_t num_ssbos = 0; u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = gpu.regs.shader_config[index]; + const auto& shader_config = maxwell3d.regs.shader_config[index]; const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { switch (program) { case Maxwell::ShaderProgram::Geometry: program_manager.UseGeometryShader(0); @@ -329,31 +344,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } // Currently this stages are not supported in the OpenGL backend. - // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl) { - continue; - } else if (program == Maxwell::ShaderProgram::TesselationEval) { + // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL + if (program == Maxwell::ShaderProgram::TesselationControl || + program == Maxwell::ShaderProgram::TesselationEval) { continue; } - Shader* const shader = shader_cache.GetStageProgram(program); - - if (device.UseAssemblyShaders()) { - // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this - // all stages share the same bindings. - const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); - ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); - num_ssbos += num_stage_ssbos; - } - - // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; - SetupDrawConstBuffers(stage, shader); - SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); + Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->GetHandle(); + const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: @@ -370,6 +369,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_config.enable.Value(), shader_config.offset); } + // Stage indices are 0 - 5 + const std::size_t stage = index == 0 ? 0 : index - 1; + SetupDrawConstBuffers(stage, shader); + SetupDrawGlobalMemory(stage, shader); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); + // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -384,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } SyncClipEnabled(clip_distances); - gpu.dirty.flags[Dirty::Shaders] = false; + maxwell3d.dirty.flags[Dirty::Shaders] = false; } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -406,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { } std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; - - return static_cast<std::size_t>(regs.index_array.count) * - static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); + return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * + static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } -void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, +void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(stop_loading, callback); -} - -void RasterizerOpenGL::SetupDirtyFlags() { - state_tracker.Initialize(); + shader_cache.LoadDiskCache(title_id, stop_loading, callback); } void RasterizerOpenGL::ConfigureFramebuffers() { MICROPROFILE_SCOPE(OpenGL_Framebuffer); - auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) { + if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { return; } - gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; + maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; texture_cache.GuardRenderTargets(true); View depth_surface = texture_cache.GetDepthBufferSurface(true); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); // Bind the framebuffer surfaces @@ -465,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { } void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { - auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; texture_cache.GuardRenderTargets(true); View color_surface; @@ -516,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de } void RasterizerOpenGL::Clear() { - const auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.ShouldExecute()) { + if (!maxwell3d.ShouldExecute()) { return; } - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; bool use_color{}; bool use_depth{}; bool use_stencil{}; @@ -586,7 +583,6 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); - auto& gpu = system.GPU().Maxwell3D(); query_cache.UpdateCounters(); @@ -634,7 +630,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (invalidated) { // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty - auto& dirty = gpu.dirty.flags; + auto& dirty = maxwell3d.dirty.flags; dirty[Dirty::VertexBuffers] = true; for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { dirty[index] = true; @@ -655,7 +651,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup emulation uniform buffer. if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; - ubo.SetFromRegs(gpu); + ubo.SetFromRegs(maxwell3d); const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, @@ -664,7 +660,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. texture_cache.GuardSamplers(true); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); SetupShaders(primitive_mode); texture_cache.GuardSamplers(false); @@ -681,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(primitive_mode); - const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); + const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = - static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); + static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1); if (is_indexed) { - const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); - const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); + const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); + const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); - const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); + const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { glDrawElements(primitive_mode, num_vertices, format, offset); } else if (num_instances == 1 && base_instance == 0) { @@ -707,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { base_instance); } } else { - const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); - const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); + const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first); + const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count); if (num_instances == 1 && base_instance == 0) { glDrawArrays(primitive_mode, base_vertex, num_vertices); } else if (base_instance == 0) { @@ -723,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ++num_queued_commands; - system.GPU().TickWork(); + gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -731,6 +727,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { current_cbuf = 0; auto kernel = shader_cache.GetComputeKernel(code_addr); + program_manager.BindCompute(kernel->GetHandle()); + SetupComputeTextures(kernel); SetupComputeImages(kernel); @@ -744,7 +742,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; @@ -807,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() { } void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { - auto& memory_manager{gpu.MemoryManager()}; - memory_manager.Write<u32>(addr, value); + gpu_memory.Write<u32>(addr, value); return; } fence_manager.SignalSemaphore(addr, value); } void RasterizerOpenGL::SignalSyncPoint(u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return; @@ -826,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { } void RasterizerOpenGL::ReleaseFences() { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { return; } @@ -912,7 +906,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& stages = system.GPU().Maxwell3D().state.shader_stages; + const auto& stages = maxwell3d.state.shader_stages; const auto& shader_stage = stages[stage_index]; const auto& entries = shader->GetEntries(); const bool use_unified = entries.use_unified_uniforms; @@ -937,7 +931,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; const auto& entries = kernel->GetEntries(); const bool use_unified = entries.use_unified_uniforms; @@ -1005,45 +999,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + static constexpr std::array TARGET_LUT = { + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + + const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; + const auto& entries{shader->GetEntries().global_memory_entries}; - u32 binding = - device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; - for (const auto& entry : shader->GetEntries().global_memory_entries) { + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); + + const bool assembly_shaders = device.UseAssemblyShaders(); + u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; + for (const auto& entry : entries) { const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; - const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; - const u32 size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; + const u32 size{gpu_memory.Read<u32>(addr + 8)}; + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (assembly_shaders) { + UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; + const auto& entries{kernel->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); u32 binding = 0; - for (const auto& entry : kernel->GetEntries().global_memory_entries) { - const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; - const auto gpu_addr{memory_manager.Read<u64>(addr)}; - const auto size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + for (const auto& entry : entries) { + const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; + const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; + const u32 size{gpu_memory.Read<u32>(addr + 8)}; + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (device.UseAssemblyShaders()) { + UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, std::size_t size) { - const auto alignment{device.GetShaderStorageBufferAlignment()}; + GPUVAddr gpu_addr, std::size_t size, + GLuint64EXT* pointer) { + const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, - static_cast<GLsizeiptr>(size)); + if (device.UseAssemblyShaders()) { + *pointer = info.address + info.offset; + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, + static_cast<GLsizeiptr>(size)); + } } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetEntries().samplers) { const auto shader_type = static_cast<ShaderType>(stage_index); @@ -1056,11 +1071,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetEntries().samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); + const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); SetupTexture(binding++, texture, entry); } } @@ -1084,20 +1098,18 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu } void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { - const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).image; for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); + const auto shader_type = static_cast<ShaderType>(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; SetupImage(binding++, tic, entry); } } void RasterizerOpenGL::SetupComputeImages(Shader* shader) { - const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : shader->GetEntries().images) { - const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; + const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; SetupImage(binding++, tic, entry); } } @@ -1117,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t } void RasterizerOpenGL::SyncViewport() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; - const auto& regs = gpu.regs; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; const bool dirty_viewport = flags[Dirty::Viewports]; const bool dirty_clip_control = flags[Dirty::ClipControl]; @@ -1191,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() { } void RasterizerOpenGL::SyncDepthClamp() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::DepthClampEnabled]) { return; } flags[Dirty::DepthClampEnabled] = false; - oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0); + oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0); } void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; - clip_mask &= gpu.regs.clip_distance_enabled; + clip_mask &= maxwell3d.regs.clip_distance_enabled; if (clip_mask == last_clip_distance_mask) { return; } @@ -1225,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; - const auto& regs = gpu.regs; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; if (flags[Dirty::CullTest]) { flags[Dirty::CullTest] = false; @@ -1242,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncPrimitiveRestart() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PrimitiveRestart]) { return; } flags[Dirty::PrimitiveRestart] = false; - if (gpu.regs.primitive_restart.enabled) { + if (maxwell3d.regs.primitive_restart.enabled) { glEnable(GL_PRIMITIVE_RESTART); - glPrimitiveRestartIndex(gpu.regs.primitive_restart.index); + glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index); } else { glDisable(GL_PRIMITIVE_RESTART); } } void RasterizerOpenGL::SyncDepthTestState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; - const auto& regs = gpu.regs; if (flags[Dirty::DepthMask]) { flags[Dirty::DepthMask] = false; glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE); @@ -1279,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() { } void RasterizerOpenGL::SyncStencilTestState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::StencilTest]) { return; } flags[Dirty::StencilTest] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_STENCIL_TEST, regs.stencil_enable); glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), @@ -1311,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() { } void RasterizerOpenGL::SyncRasterizeEnable() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::RasterizeEnable]) { return; } flags[Dirty::RasterizeEnable] = false; - oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); + oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0); } void RasterizerOpenGL::SyncPolygonModes() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PolygonModes]) { return; } flags[Dirty::PolygonModes] = false; - if (gpu.regs.fill_rectangle) { + const auto& regs = maxwell3d.regs; + if (regs.fill_rectangle) { if (!GLAD_GL_NV_fill_rectangle) { LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported"); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); @@ -1342,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() { return; } - if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) { + if (regs.polygon_mode_front == regs.polygon_mode_back) { flags[Dirty::PolygonModeFront] = false; flags[Dirty::PolygonModeBack] = false; - glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); + glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front)); return; } if (flags[Dirty::PolygonModeFront]) { flags[Dirty::PolygonModeFront] = false; - glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); + glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front)); } if (flags[Dirty::PolygonModeBack]) { flags[Dirty::PolygonModeBack] = false; - glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back)); + glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back)); } } void RasterizerOpenGL::SyncColorMask() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::ColorMasks]) { return; } @@ -1371,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() { const bool force = flags[Dirty::ColorMaskCommon]; flags[Dirty::ColorMaskCommon] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; if (regs.color_mask_common) { if (!force && !flags[Dirty::ColorMask0]) { return; @@ -1396,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() { } void RasterizerOpenGL::SyncMultiSampleState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::MultisampleControl]) { return; } flags[Dirty::MultisampleControl] = false; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage); oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one); } void RasterizerOpenGL::SyncFragmentColorClampState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::FragmentClampColor]) { return; } flags[Dirty::FragmentClampColor] = false; - glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); + glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); } void RasterizerOpenGL::SyncBlendState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; - const auto& regs = gpu.regs; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; if (flags[Dirty::BlendColor]) { flags[Dirty::BlendColor] = false; @@ -1479,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() { } void RasterizerOpenGL::SyncLogicOpState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::LogicOp]) { return; } flags[Dirty::LogicOp] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; if (regs.logic_op.enable) { glEnable(GL_COLOR_LOGIC_OP); glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation)); @@ -1496,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() { } void RasterizerOpenGL::SyncScissorTest() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::Scissors]) { return; } flags[Dirty::Scissors] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { if (!flags[Dirty::Scissor0 + index]) { continue; @@ -1522,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() { } void RasterizerOpenGL::SyncPointState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PointSize]) { return; } flags[Dirty::PointSize] = false; - oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable); + oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); - if (gpu.regs.vp_point_size.enable) { + if (maxwell3d.regs.vp_point_size.enable) { // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. glEnable(GL_PROGRAM_POINT_SIZE); return; @@ -1539,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() { // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). - glPointSize(std::max(1.0f, gpu.regs.point_size)); + glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncLineState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::LineWidth]) { return; } flags[Dirty::LineWidth] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable); glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased); } void RasterizerOpenGL::SyncPolygonOffset() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PolygonOffset]) { return; } flags[Dirty::PolygonOffset] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable); oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable); oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable); @@ -1578,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() { } void RasterizerOpenGL::SyncAlphaTest() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::AlphaTest]) { return; } flags[Dirty::AlphaTest] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; if (regs.alpha_test_enabled && regs.rt_control.count > 1) { LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); } @@ -1599,20 +1593,19 @@ void RasterizerOpenGL::SyncAlphaTest() { } void RasterizerOpenGL::SyncFramebufferSRGB() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::FramebufferSRGB]) { return; } flags[Dirty::FramebufferSRGB] = false; - oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); + oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } void RasterizerOpenGL::SyncTransformFeedback() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; static constexpr std::size_t STRIDE = 3; std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; @@ -1664,7 +1657,7 @@ void RasterizerOpenGL::SyncTransformFeedback() { } void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -1707,7 +1700,7 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f082592f..f451404b2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -33,10 +33,11 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" -namespace Core { -class System; +namespace Core::Memory { +class Memory; } namespace Core::Frontend { @@ -54,9 +55,10 @@ struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - const Device& device, ScreenInfo& info, - ProgramManager& program_manager, StateTracker& state_tracker); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + Core::Memory::Memory& cpu_memory, const Device& device, + ScreenInfo& screen_info, ProgramManager& program_manager, + StateTracker& state_tracker); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -82,15 +84,22 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void LoadDiskResources(const std::atomic_bool& stop_loading, + void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; - void SetupDirtyFlags() override; /// Returns true when there are commands queued to the OpenGL server. bool AnyCommandQueued() const { return num_queued_commands > 0; } + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -115,9 +124,9 @@ private: /// Configures the current global memory entries to use for the kernel invocation. void SetupComputeGlobalMemory(Shader* kernel); - /// Configures a constant buffer. + /// Configures a global memory buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - std::size_t size); + std::size_t size, GLuint64EXT* pointer); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, Shader* shader); @@ -228,7 +237,15 @@ private: void SetupShaders(GLenum primitive_mode); + Tegra::GPU& gpu; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + const Device& device; + ScreenInfo& screen_info; + ProgramManager& program_manager; + StateTracker& state_tracker; TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; @@ -238,10 +255,7 @@ private: OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; - Core::System& system; - ScreenInfo& screen_info; - ProgramManager& program_manager; - StateTracker& state_tracker; + VideoCommon::Shader::AsyncShaders async_shaders; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index a787e27d2..0ebcec427 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <string_view> #include <utility> #include <glad/glad.h> #include "common/common_types.h" @@ -82,11 +83,13 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(const char* source, GLenum type) { - if (handle != 0) +void OGLShader::Create(std::string_view source, GLenum type) { + if (handle != 0) { return; - if (source == nullptr) + } + if (source.empty()) { return; + } MICROPROFILE_SCOPE(OpenGL_ResourceCreation); handle = GLShader::LoadShader(source, type); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f8b322227..f48398669 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -4,6 +4,7 @@ #pragma once +#include <string_view> #include <utility> #include <glad/glad.h> #include "common/common_types.h" @@ -127,7 +128,7 @@ public: return *this; } - void Create(const char* source, GLenum type); + void Create(std::string_view source, GLenum type); void Release(); @@ -177,6 +178,12 @@ public: Release(); } + OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c6a3bf3a1..bd56bed0c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" @@ -31,6 +32,7 @@ #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace OpenGL { @@ -125,7 +127,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, entry.graphics_info, entry.compute_info}; - const auto registry = std::make_shared<Registry>(entry.type, info); + auto registry = std::make_shared<Registry>(entry.type, info); for (const auto& [address, value] : entry.keys) { const auto [buffer, offset] = address; registry->InsertKey(buffer, offset, value); @@ -140,9 +142,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { return registry; } +std::unordered_set<GLenum> GetSupportedFormats() { + GLint num_formats; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector<GLint> formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { + supported_formats.insert(static_cast<GLenum>(format)); + } + return supported_formats; +} + +} // Anonymous namespace + ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, - bool hint_retrievable = false) { + const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { const std::string shader_id = MakeShaderID(unique_identifier, shader_type); LOG_INFO(Render_OpenGL, "{}", shader_id); @@ -181,30 +198,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); - } - return supported_formats; -} - -} // Anonymous namespace - Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { + ProgramSharedPtr program_, bool is_built) + : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, + is_built(is_built) { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; } - ASSERT(handle != 0); + if (is_built) { + ASSERT(handle != 0); + } } Shader::~Shader() = default; @@ -214,43 +218,78 @@ GLuint Shader::GetHandle() const { return handle; } -std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode code, ProgramCode code_b) { +bool Shader::IsBuilt() const { + return is_built; +} + +void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { + program->source_program = std::move(new_program); + handle = program->source_program.handle; + is_built = true; +} + +void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { + program->assembly_program = std::move(new_program); + handle = program->assembly_program.handle; + is_built = true; +} + +std::unique_ptr<Shader> Shader::CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, + ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + auto& gpu = params.gpu; + gpu.ShaderNotify().MarkSharderBuilding(); + + auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); + if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!code_b.empty()) { + // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); + // } + auto program = + BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + gpu.ShaderNotify().MarkShaderComplete(); + + return std::unique_ptr<Shader>(new Shader(std::move(registry), + MakeEntries(params.device, ir, shader_type), + std::move(program), true)); + } else { + // Required for entries + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + auto entries = MakeEntries(params.device, ir, shader_type); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); + async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, + std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, + COMPILER_SETTINGS, *registry, cpu_addr); - return std::unique_ptr<Shader>(new Shader( - std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); + auto program = std::make_shared<ProgramHandle>(); + return std::unique_ptr<Shader>( + new Shader(std::move(registry), std::move(entries), std::move(program), false)); + } } std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { - const std::size_t size_in_bytes = code.size() * sizeof(u64); + auto& gpu = params.gpu; + gpu.ShaderNotify().MarkSharderBuilding(); - auto& engine = params.system.GPU().KeplerCompute(); - auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); + auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); @@ -266,6 +305,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); + gpu.ShaderNotify().MarkShaderComplete(); + return std::unique_ptr<Shader>(new Shader(std::move(registry), MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); @@ -277,15 +318,20 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); } -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - Core::Frontend::EmuWindow& emu_window, const Device& device) - : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, - emu_window{emu_window}, device{device}, disk_cache{system} {} +ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, + Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, device{device_} {} ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, +void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { + disk_cache.BindTitleID(title_id); const std::optional transferable = disk_cache.LoadTransferable(); if (!transferable) { return; @@ -361,7 +407,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } }; - const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; + const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; const std::size_t bucket_size{transferable->size() / num_workers}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); std::vector<std::thread> threads(num_workers); @@ -436,42 +482,77 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( return program; } -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { - return last_shaders[static_cast<std::size_t>(program)]; +Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders) { + if (!maxwell3d.dirty.flags[Dirty::Shaders]) { + auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; + if (last_shader->IsBuilt()) { + return last_shader; + } } - auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr address{GetShaderAddress(system, program)}; + const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; + + if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { + auto completed_work = async_shaders.GetCompletedWork(); + for (auto& work : completed_work) { + Shader* shader = TryGet(work.cpu_address); + gpu.ShaderNotify().MarkShaderComplete(); + if (shader == nullptr) { + continue; + } + using namespace VideoCommon::Shader; + if (work.backend == AsyncShaders::Backend::OpenGL) { + shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + shader->AsyncGLASMBuilt(std::move(work.program.glasm)); + } + + auto& registry = shader->GetRegistry(); + + ShaderDiskCacheEntry entry; + entry.type = work.shader_type; + entry.code = std::move(work.code); + entry.code_b = std::move(work.code_b); + entry.unique_identifier = work.uid; + entry.bound_buffer = registry.GetBoundBuffer(); + entry.graphics_info = registry.GetGraphicsInfo(); + entry.keys = registry.GetKeys(); + entry.bound_samplers = registry.GetBoundSamplers(); + entry.bindless_samplers = registry.GetBindlessSamplers(); + disk_cache.SaveEntry(std::move(entry)); + } + } // Look up shader in the cache based on address - const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; + const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { return last_shaders[static_cast<std::size_t>(program)] = shader; } - const auto host_ptr{memory_manager.GetPointer(address)}; + const u8* const host_ptr{gpu_memory.GetPointer(address)}; // No shader found - create a new one - ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)}; + ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; ProgramCode code_b; if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = memory_manager.GetPointer(address_b); - code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); + const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; + const u8* host_ptr_b = gpu_memory.GetPointer(address_b); + code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); } const std::size_t code_size = code.size() * sizeof(u64); const u64 unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const ShaderParameters params{system, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; + const ShaderParameters params{gpu, maxwell3d, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); + shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), + async_shaders, cpu_addr.value_or(0)); } else { shader = Shader::CreateFromCache(params, found->second); } @@ -487,21 +568,20 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { } Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; + const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { return kernel; } - const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one - ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; + const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; + ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; const std::size_t code_size{code.size() * sizeof(u64)}; const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - const ShaderParameters params{system, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; + const ShaderParameters params{gpu, kepler_compute, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; std::unique_ptr<Shader> kernel; const auto found = runtime_cache.find(unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 994aaeaf2..1708af06a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -25,14 +25,18 @@ #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" -namespace Core { -class System; +namespace Tegra { +class MemoryManager; } namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class AsyncShaders; +} + namespace OpenGL { class Device; @@ -53,14 +57,20 @@ struct PrecompiledShader { }; struct ShaderParameters { - Core::System& system; + Tegra::GPU& gpu; + Tegra::Engines::ConstBufferEngineInterface& engine; ShaderDiskCacheOpenGL& disk_cache; const Device& device; VAddr cpu_addr; - u8* host_ptr; + const u8* host_ptr; u64 unique_identifier; }; +ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, + u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + bool hint_retrievable = false); + class Shader final { public: ~Shader(); @@ -68,15 +78,28 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; + bool IsBuilt() const; + /// Gets the shader entries for the shader const ShaderEntries& GetEntries() const { return entries; } - static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, - ProgramCode program_code_b); + const VideoCommon::Shader::Registry& GetRegistry() const { + return *registry; + } + + /// Mark a OpenGL shader as built + void AsyncOpenGLBuilt(OGLProgram new_program); + + /// Mark a GLASM shader as built + void AsyncGLASMBuilt(OGLAssemblyProgram new_program); + + static std::unique_ptr<Shader> CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b, + VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); + static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); @@ -85,26 +108,30 @@ public: private: explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program); + ProgramSharedPtr program, bool is_built = true); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; ProgramSharedPtr program; GLuint handle = 0; + bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - Core::Frontend::EmuWindow& emu_window, const Device& device); + explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window, + Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device); ~ShaderCacheOpenGL() override; /// Loads disk cache for the current game - void LoadDiskCache(const std::atomic_bool& stop_loading, + void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program); + Shader* GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders); /// Gets a compute kernel in the passed address Shader* GetComputeKernel(GPUVAddr code_addr); @@ -114,9 +141,13 @@ private: const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const std::unordered_set<GLenum>& supported_formats); - Core::System& system; Core::Frontend::EmuWindow& emu_window; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; + ShaderDiskCacheOpenGL disk_cache; std::unordered_map<u64, PrecompiledShader> runtime_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2c49aeaac..3f75fcd2b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -602,8 +602,15 @@ private: return; } const auto& info = registry.GetComputeInfo(); - if (const u32 size = info.shared_memory_size_in_words; size > 0) { - code.AddLine("shared uint smem[{}];", size); + if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (size > limit) { + LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", + size, limit); + size = limit; + } + + code.AddLine("shared uint smem[{}];", size / 4); code.AddNewLine(); } code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", @@ -1912,7 +1919,7 @@ private: Expression Comparison(Operation operation) { static_assert(!unordered || type == Type::Float); - const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); + Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's @@ -1952,10 +1959,6 @@ private: return {fmt::format("({} != 0)", carry), Type::Bool}; } - Expression LogicalFIsNan(Operation operation) { - return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); - } - Expression LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -2771,15 +2774,6 @@ private: return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } - bool IsRenderTargetEnabled(u32 render_target) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - return true; - } - } - return false; - } - const Device& device; const ShaderIR& ir; const Registry& registry; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 2dcc2b0eb..166ee34e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -73,7 +73,7 @@ ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; -bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { +bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { return false; } @@ -144,7 +144,7 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { return true; } -bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { +bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { if (file.WriteObject(static_cast<u32>(type)) != 1 || file.WriteObject(static_cast<u32>(code.size())) != 1 || file.WriteObject(static_cast<u32>(code_b.size())) != 1) { @@ -206,28 +206,32 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { flat_bindless_samplers.size(); } -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} +ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; +void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { + title_id = title_id_; +} + std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { // Skip games without title id - const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; + const bool has_title_id = title_id != 0; if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return {}; + return std::nullopt; } - FileUtil::IOFile file(GetTransferablePath(), "rb"); + Common::FS::IOFile file(GetTransferablePath(), "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No transferable shader cache found"); is_usable = true; - return {}; + return std::nullopt; } u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return {}; + return std::nullopt; } if (version < NativeVersion) { @@ -235,12 +239,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran file.Close(); InvalidateTransferable(); is_usable = true; - return {}; + return std::nullopt; } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " "of the emulator, skipping"); - return {}; + return std::nullopt; } // Version is valid, load the shaders @@ -249,7 +253,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran ShaderDiskCacheEntry& entry = entries.emplace_back(); if (!entry.Load(file)) { LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return {}; + return std::nullopt; } } @@ -262,7 +266,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() return {}; } - FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + Common::FS::IOFile file(GetPrecompiledPath(), "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); return {}; @@ -279,7 +283,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() } std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - FileUtil::IOFile& file) { + Common::FS::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); file.ReadBytes(compressed.data(), compressed.size()); @@ -290,12 +294,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo ShaderCacheVersionHash file_hash{}; if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { precompiled_cache_virtual_file_offset = 0; - return {}; + return std::nullopt; } if (GetShaderCacheVersionHash() != file_hash) { LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); precompiled_cache_virtual_file_offset = 0; - return {}; + return std::nullopt; } std::vector<ShaderDiskCachePrecompiled> entries; @@ -305,19 +309,20 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo if (!LoadObjectFromPrecompiled(entry.unique_identifier) || !LoadObjectFromPrecompiled(entry.binary_format) || !LoadObjectFromPrecompiled(binary_size)) { - return {}; + return std::nullopt; } entry.binary.resize(binary_size); if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return {}; + return std::nullopt; } } - return entries; + + return std::move(entries); } void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!FileUtil::Delete(GetTransferablePath())) { + if (!Common::FS::Delete(GetTransferablePath())) { LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", GetTransferablePath()); } @@ -328,7 +333,7 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { // Clear virtaul precompiled cache file precompiled_cache_virtual_file.Resize(0); - if (!FileUtil::Delete(GetPrecompiledPath())) { + if (!Common::FS::Delete(GetPrecompiledPath())) { LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath()); } } @@ -344,7 +349,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { return; } - FileUtil::IOFile file = AppendTransferableFile(); + Common::FS::IOFile file = AppendTransferableFile(); if (!file.IsOpen()) { return; } @@ -386,15 +391,15 @@ void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint progra } } -FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { +Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { if (!EnsureDirectories()) { return {}; } const auto transferable_path{GetTransferablePath()}; - const bool existed = FileUtil::Exists(transferable_path); + const bool existed = Common::FS::Exists(transferable_path); - FileUtil::IOFile file(transferable_path, "ab"); + Common::FS::IOFile file(transferable_path, "ab"); if (!file.IsOpen()) { LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path); return {}; @@ -426,7 +431,7 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; - FileUtil::IOFile file(precompiled_path, "wb"); + Common::FS::IOFile file(precompiled_path, "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); @@ -440,24 +445,24 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { bool ShaderDiskCacheOpenGL::EnsureDirectories() const { const auto CreateDir = [](const std::string& dir) { - if (!FileUtil::CreateDir(dir)) { + if (!Common::FS::CreateDir(dir)) { LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir); return false; } return true; }; - return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && + return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) && CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && CreateDir(GetPrecompiledDir()); } std::string ShaderDiskCacheOpenGL::GetTransferablePath() const { - return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); + return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); } std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); + return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); } std::string ShaderDiskCacheOpenGL::GetTransferableDir() const { @@ -469,11 +474,11 @@ std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const { } std::string ShaderDiskCacheOpenGL::GetBaseDir() const { - return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl"; + return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl"; } std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID()); + return fmt::format("{:016X}", title_id); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index a79cef0e9..aef841c1d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -21,11 +21,7 @@ #include "video_core/engines/shader_type.h" #include "video_core/shader/registry.h" -namespace Core { -class System; -} - -namespace FileUtil { +namespace Common::FS { class IOFile; } @@ -38,9 +34,9 @@ struct ShaderDiskCacheEntry { ShaderDiskCacheEntry(); ~ShaderDiskCacheEntry(); - bool Load(FileUtil::IOFile& file); + bool Load(Common::FS::IOFile& file); - bool Save(FileUtil::IOFile& file) const; + bool Save(Common::FS::IOFile& file) const; bool HasProgramA() const { return !code.empty() && !code_b.empty(); @@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled { class ShaderDiskCacheOpenGL { public: - explicit ShaderDiskCacheOpenGL(Core::System& system); + explicit ShaderDiskCacheOpenGL(); ~ShaderDiskCacheOpenGL(); + /// Binds a title ID for all future operations. + void BindTitleID(u64 title_id); + /// Loads transferable cache. If file has a old version or on failure, it deletes the file. std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); @@ -97,10 +96,10 @@ public: private: /// Loads the transferable cache. Returns empty on failure. std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( - FileUtil::IOFile& file); + Common::FS::IOFile& file); /// Opens current game's transferable file and write it's header if it doesn't exist - FileUtil::IOFile AppendTransferableFile() const; + Common::FS::IOFile AppendTransferableFile() const; /// Save precompiled header to precompiled_cache_in_memory void SavePrecompiledHeaderToVirtualPrecompiledCache(); @@ -157,8 +156,6 @@ private: return LoadArrayFromPrecompiled(&object, 1); } - Core::System& system; - // Stores whole precompiled cache which will be read from or saved to the precompiled chache // file FileSys::VectorVfsFile precompiled_cache_virtual_file; @@ -168,8 +165,11 @@ private: // Stored transferable shaders std::unordered_set<u64> stored_transferable; + /// Title ID to operate on + u64 title_id = 0; + // The cache has been loaded at boot - bool is_usable{}; + bool is_usable = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8e754fa90..691c6c79b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -11,8 +11,30 @@ namespace OpenGL { -ProgramManager::ProgramManager(const Device& device) { - use_assembly_programs = device.UseAssemblyShaders(); +namespace { + +void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { + if (current == old) { + return; + } + if (current == 0) { + if (enabled) { + enabled = false; + glDisable(stage); + } + return; + } + if (!enabled) { + enabled = true; + glEnable(stage); + } + glBindProgramARB(stage, current); +} + +} // Anonymous namespace + +ProgramManager::ProgramManager(const Device& device) + : use_assembly_programs{device.UseAssemblyShaders()} { if (use_assembly_programs) { glEnable(GL_COMPUTE_PROGRAM_NV); } else { @@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) { } void ProgramManager::BindGraphicsPipeline() { - if (use_assembly_programs) { - UpdateAssemblyPrograms(); - } else { + if (!use_assembly_programs) { UpdateSourcePrograms(); } } @@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() { } } -void ProgramManager::UpdateAssemblyPrograms() { - const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); - }; +void ProgramManager::UseVertexShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); + } + current_state.vertex = program; +} - update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, - old_state.geometry); - update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, - old_state.fragment); +void ProgramManager::UseGeometryShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); + } + current_state.geometry = program; +} - old_state = current_state; +void ProgramManager::UseFragmentShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); + } + current_state.fragment = program; } void ProgramManager::UpdateSourcePrograms() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0f03b4f12..950e0dfcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,17 +45,9 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); - void UseVertexShader(GLuint program) { - current_state.vertex = program; - } - - void UseGeometryShader(GLuint program) { - current_state.geometry = program; - } - - void UseFragmentShader(GLuint program) { - current_state.fragment = program; - } + void UseVertexShader(GLuint program); + void UseGeometryShader(GLuint program); + void UseFragmentShader(GLuint program); private: struct PipelineState { @@ -64,9 +56,6 @@ private: GLuint fragment = 0; }; - /// Update NV_gpu_program5 programs. - void UpdateAssemblyPrograms(); - /// Update GLSL programs. void UpdateSourcePrograms(); diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 9e74eda0d..4bf0d6090 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <string_view> #include <vector> #include <glad/glad.h> #include "common/assert.h" @@ -11,7 +12,8 @@ namespace OpenGL::GLShader { namespace { -const char* GetStageDebugName(GLenum type) { + +std::string_view StageDebugName(GLenum type) { switch (type) { case GL_VERTEX_SHADER: return "vertex"; @@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) { UNIMPLEMENTED(); return "unknown"; } + } // Anonymous namespace -GLuint LoadShader(const char* source, GLenum type) { - const char* debug_type = GetStageDebugName(type); +GLuint LoadShader(std::string_view source, GLenum type) { + const std::string_view debug_type = StageDebugName(type); const GLuint shader_id = glCreateShader(type); - glShaderSource(shader_id, 1, &source, nullptr); + + const GLchar* source_string = source.data(); + const GLint source_length = static_cast<GLint>(source.size()); + + glShaderSource(shader_id, 1, &source_string, &source_length); LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); glCompileShader(shader_id); diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 03b7548c2..1b770532e 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) { * @param source String of the GLSL shader program * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) */ -GLuint LoadShader(const char* source, GLenum type); +GLuint LoadShader(std::string_view source, GLenum type); /** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index d24fad3de..6bcf831f2 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) { } // Anonymous namespace -StateTracker::StateTracker(Core::System& system) : system{system} {} - -void StateTracker::Initialize() { - auto& dirty = system.GPU().Maxwell3D().dirty; +StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { + auto& dirty = gpu.Maxwell3D().dirty; auto& tables = dirty.tables; SetupDirtyRenderTargets(tables); SetupDirtyColorMasks(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 0f823288e..9d127548f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -13,8 +13,8 @@ #include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" -namespace Core { -class System; +namespace Tegra { +class GPU; } namespace OpenGL { @@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max()); class StateTracker { public: - explicit StateTracker(Core::System& system); - - void Initialize(); + explicit StateTracker(Tegra::GPU& gpu); void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { @@ -103,7 +101,6 @@ public: } void NotifyScreenDrawVertexArray() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; flags[OpenGL::Dirty::VertexFormat0 + 1] = true; @@ -117,98 +114,81 @@ public: } void NotifyPolygonModes() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::PolygonModes] = true; flags[OpenGL::Dirty::PolygonModeFront] = true; flags[OpenGL::Dirty::PolygonModeBack] = true; } void NotifyViewport0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::Viewports] = true; flags[OpenGL::Dirty::Viewport0] = true; } void NotifyScissor0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::Scissors] = true; flags[OpenGL::Dirty::Scissor0] = true; } void NotifyColorMask0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::ColorMasks] = true; flags[OpenGL::Dirty::ColorMask0] = true; } void NotifyBlend0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::BlendStates] = true; flags[OpenGL::Dirty::BlendState0] = true; } void NotifyFramebuffer() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[VideoCommon::Dirty::RenderTargets] = true; } void NotifyFrontFace() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::FrontFace] = true; } void NotifyCullTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::CullTest] = true; } void NotifyDepthMask() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::DepthMask] = true; } void NotifyDepthTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::DepthTest] = true; } void NotifyStencilTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::StencilTest] = true; } void NotifyPolygonOffset() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::PolygonOffset] = true; } void NotifyRasterizeEnable() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::RasterizeEnable] = true; } void NotifyFramebufferSRGB() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::FramebufferSRGB] = true; } void NotifyLogicOp() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::LogicOp] = true; } void NotifyClipControl() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::ClipControl] = true; } void NotifyAlphaTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::AlphaTest] = true; } private: - Core::System& system; + Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; GLuint index_buffer = 0; }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 3655ff629..887995cf4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver mapped_ptr = static_cast<u8*>( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 61505879b..a863ef218 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -41,91 +41,103 @@ struct FormatTuple { }; constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45 - {GL_COMPRESSED_RED_RGTC1}, // DXN1 - {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16 - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG8UI - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8 - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5 - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5 + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8 + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6 + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10 + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12 + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6 + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5 + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { @@ -178,10 +190,10 @@ GLint GetSwizzleSource(SwizzleSource source) { GLenum GetComponent(PixelFormat format, bool is_first) { switch (format) { - case PixelFormat::Z24S8: - case PixelFormat::Z32FS8: + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; - case PixelFormat::S8Z24: + case PixelFormat::S8_UINT_D24_UNORM: return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; default: UNREACHABLE(); @@ -391,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() { LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); } -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); } @@ -482,9 +494,9 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou std::array swizzle{x_source, y_source, z_source, w_source}; switch (const PixelFormat format = GetSurfaceParams().pixel_format) { - case PixelFormat::Z24S8: - case PixelFormat::Z32FS8: - case PixelFormat::S8Z24: + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, GetComponent(format, x_source == SwizzleSource::R)); @@ -520,10 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { return texture_view; } -TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer, - const Device& device, StateTracker& state_tracker) - : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} { +TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const Device& device, + StateTracker& state_tracker_) + : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{ + state_tracker_} { src_framebuffer.Create(); dst_framebuffer.Create(); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index bfc4ddf5d..7787134fc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -90,7 +90,7 @@ public: Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source); - void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); + void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); void MarkAsModified(u64 tick) { surface.MarkAsModified(true, tick); @@ -129,8 +129,10 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: - explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const Device& device, StateTracker& state_tracker); + explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const Device& device, + StateTracker& state_tracker); ~TextureCacheOpenGL(); protected: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e66cdc083..a4c5b8f74 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,6 +21,8 @@ #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -44,46 +46,6 @@ struct Frame { bool is_srgb{}; /// Framebuffer is sRGB or RGB }; -constexpr char VERTEX_SHADER[] = R"( -#version 430 core - -out gl_PerVertex { - vec4 gl_Position; -}; - -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; -layout (location = 0) out vec2 frag_tex_coord; - -// This is a truncated 3x3 matrix for 2D transformations: -// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. -// The third column performs translation. -// The third row could be used for projection, which we don't need in 2D. It hence is assumed to -// implicitly be [0, 0, 1] -layout (location = 0) uniform mat3x2 modelview_matrix; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - -constexpr char FRAGMENT_SHADER[] = R"( -#version 430 core - -layout (location = 0) in vec2 frag_tex_coord; -layout (location = 0) out vec4 color; - -layout (binding = 0) uniform sampler2D color_texture; - -void main() { - color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); -} -)"; - constexpr GLint PositionLocation = 0; constexpr GLint TexCoordLocation = 1; constexpr GLint ModelViewMatrixLocation = 0; @@ -313,10 +275,13 @@ public: } }; -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, - Core::Frontend::GraphicsContext& context) - : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, - program_manager{device}, has_debug_tool{HasDebugTool()} {} +RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window_, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context) + : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_}, + emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device}, + has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; @@ -384,7 +349,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (has_debug_tool) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); Present(0); - context.SwapBuffers(); + context->SwapBuffers(); } } @@ -423,7 +388,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; - u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)}; + u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); // TODO(Rodrigo): Read this from HLE @@ -460,10 +425,10 @@ void RendererOpenGL::InitOpenGLObjects() { // Create shader programs OGLShader vertex_shader; - vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER); + vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); OGLShader fragment_shader; - fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER); + fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); vertex_program.Create(true, false, vertex_shader.handle); fragment_program.Create(true, false, fragment_shader.handle); @@ -508,18 +473,18 @@ void RendererOpenGL::AddTelemetryFields() { LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); - auto& telemetry_session = system.TelemetrySession(); - telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); - telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); - telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); + constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); + telemetry_session.AddField(user_system, "GPU_Model", gpu_model); + telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version); } void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, - program_manager, state_tracker); + rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device, + screen_info, program_manager, state_tracker); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -535,12 +500,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, GLint internal_format; switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; break; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: internal_format = GL_RGB565; texture.gl_format = GL_RGB; texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 8b18d32e6..5329577fb 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -16,16 +16,25 @@ namespace Core { class System; -} +class TelemetrySession; +} // namespace Core namespace Core::Frontend { class EmuWindow; } +namespace Core::Memory { +class Memory; +} + namespace Layout { struct FramebufferLayout; } +namespace Tegra { +class GPU; +} + namespace OpenGL { /// Structure used for storing information about the textures for the Switch screen @@ -56,8 +65,10 @@ class FrameMailbox; class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, - Core::Frontend::GraphicsContext& context); + explicit RendererOpenGL(Core::TelemetrySession& telemetry_session, + Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererOpenGL() override; bool Init() override; @@ -93,12 +104,13 @@ private: bool Present(int timeout_ms); + Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; - Core::System& system; - Core::Frontend::GraphicsContext& context; - const Device device; + Core::Memory::Memory& cpu_memory; + Tegra::GPU& gpu; - StateTracker state_tracker{system}; + const Device device; + StateTracker state_tracker{gpu}; // OpenGL object IDs OGLBuffer vertex_buffer; @@ -120,7 +132,7 @@ private: std::vector<u8> gl_framebuffer_data; /// Used for transforming the framebuffer orientation - Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; + Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{}; Common::Rectangle<int> framebuffer_crop_rect; /// Frame presentation mailbox diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d1f0ea932..81a39a3b8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -40,7 +40,6 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { - const auto& clip = regs.view_volume_clip_control; const std::array enabled_lut = {regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index d7f1ae89f..f8c77f4fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -117,90 +117,101 @@ struct FormatTuple { VkFormat format; ///< Vulkan format int usage = 0; ///< Describes image format usage } constexpr tex_format_tuples[] = { - {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U - {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S - {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI - {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U - {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U - {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) - {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U - {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI - {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F - {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U - {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S - {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI - {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F - {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI - {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 - {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 - {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 - {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 - {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM - {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM - {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U - {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 - {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 - {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 - {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 - {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F - {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F - {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F - {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F - {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U - {VK_FORMAT_UNDEFINED}, // R16S - {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI - {VK_FORMAT_UNDEFINED}, // R16I - {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 - {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F - {VK_FORMAT_UNDEFINED}, // RG16UI - {VK_FORMAT_UNDEFINED}, // RG16I - {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S - {VK_FORMAT_UNDEFINED}, // RGB32F - {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB - {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U - {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S - {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI - {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI - {VK_FORMAT_UNDEFINED}, // RGBX16F - {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI - {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I - {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 - {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB - {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB - {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB - {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB - {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB - {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U - {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB - {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB - {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB - {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB - {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 - {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB - {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 - {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB - {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 - {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB - {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 - {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB - {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 - {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB - {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 - {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB - {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 - {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB - {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM + {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT + {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT + {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM + {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) + {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM + {VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM + {VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT + {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT + {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT + {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM + {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM + {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT + {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT + {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM + {VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM + {VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM + {VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM + {VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM + {VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM + {VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM + {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM + {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT + {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM + {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM + {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT + {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT + {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT + {VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT + {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT + {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT + {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM + {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT + {VK_FORMAT_UNDEFINED}, // R16_SINT + {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM + {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT + {VK_FORMAT_UNDEFINED}, // R16G16_UINT + {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM + {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT + {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB + {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM + {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM + {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT + {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT + {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT + {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT + {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT + {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM + {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB + {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB + {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB + {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT // Depth formats - {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F - {VK_FORMAT_D16_UNORM, Attachable}, // Z16 + {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT + {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM // DepthStencil formats - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) - {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) + {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT }; static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); @@ -221,7 +232,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } - // Use ABGR8 on hardware that doesn't support ASTC natively + // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp index 435c8c1b8..5b01020ec 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp @@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() { return false; } - dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash"; + dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; - (void)FileUtil::DeleteDirRecursively(dump_dir); - if (!FileUtil::CreateDir(dump_dir)) { + (void)Common::FS::DeleteDirRecursively(dump_dir); + if (!Common::FS::CreateDir(dump_dir)) { LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); return false; } @@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { return; } - FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); + Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; @@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, }(); std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size); - if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { + if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { LOG_ERROR(Render_Vulkan, "Failed to write dump file"); return; } const std::string_view json_view(json.data(), json.size()); - if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { + if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { LOG_ERROR(Render_Vulkan, "Failed to write JSON"); return; } @@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_ const std::string path = fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); - FileUtil::IOFile file(path, "wb"); + Common::FS::IOFile file(path, "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); return; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2258479f5..0e4583986 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -78,7 +78,7 @@ Common::DynamicLibrary OpenVulkanLibrary() { if (!libvulkan_env || !library.Open(libvulkan_env)) { // Use the libvulkan.dylib from the application bundle. const std::string filename = - FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; library.Open(filename.c_str()); } #else @@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() { if (!library.Open(filename.c_str())) { // Android devices may not have libvulkan.so.1, only libvulkan.so. filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); - library.Open(filename.c_str()); + (void)library.Open(filename.c_str()); } #endif return library; @@ -237,8 +237,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext } // Anonymous namespace -RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system) - : RendererBase(window), system{system} {} +RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context) + : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_}, + cpu_memory{cpu_memory_}, gpu{gpu_} {} RendererVulkan::~RendererVulkan() { ShutDown(); @@ -302,15 +306,15 @@ bool RendererVulkan::Init() { swapchain = std::make_unique<VKSwapchain>(*surface, *device); swapchain->Create(framebuffer.width, framebuffer.height, false); - state_tracker = std::make_unique<StateTracker>(system); + state_tracker = std::make_unique<StateTracker>(gpu); scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker); - rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, - *resource_manager, *memory_manager, - *state_tracker, *scheduler); + rasterizer = std::make_unique<RasterizerVulkan>( + render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, *device, + *resource_manager, *memory_manager, *state_tracker, *scheduler); - blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, + blit_screen = std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, *resource_manager, *memory_manager, *swapchain, *scheduler, screen_info); @@ -438,8 +442,7 @@ void RendererVulkan::Report() const { LOG_INFO(Render_Vulkan, "Device: {}", model_name); LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); - auto& telemetry_session = system.TelemetrySession(); - constexpr auto field = Telemetry::FieldType::UserSystem; + static constexpr auto field = Common::Telemetry::FieldType::UserSystem; telemetry_session.AddField(field, "GPU_Vendor", vendor_name); telemetry_session.AddField(field, "GPU_Model", model_name); telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 522b5bff8..ddff77942 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -14,7 +14,15 @@ #include "video_core/renderer_vulkan/wrapper.h" namespace Core { -class System; +class TelemetrySession; +} + +namespace Core::Memory { +class Memory; +} + +namespace Tegra { +class GPU; } namespace Vulkan { @@ -38,7 +46,10 @@ struct VKScreenInfo { class RendererVulkan final : public VideoCore::RendererBase { public: - explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererVulkan(Core::TelemetrySession& telemtry_session, + Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererVulkan() override; bool Init() override; @@ -57,7 +68,9 @@ private: void Report() const; - Core::System& system; + Core::TelemetrySession& telemetry_session; + Core::Memory::Memory& cpu_memory; + Tegra::GPU& gpu; Common::DynamicLibrary library; vk::InstanceDispatch dld; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index fbd406f2b..2bea7b24d 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -141,24 +141,28 @@ struct ScreenRectVertex { std::array<f32, 2> tex_coord; static VkVertexInputBindingDescription GetDescription() { - VkVertexInputBindingDescription description; - description.binding = 0; - description.stride = sizeof(ScreenRectVertex); - description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; - return description; + return { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; } static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() { - std::array<VkVertexInputAttributeDescription, 2> attributes; - attributes[0].location = 0; - attributes[0].binding = 0; - attributes[0].format = VK_FORMAT_R32G32_SFLOAT; - attributes[0].offset = offsetof(ScreenRectVertex, position); - attributes[1].location = 1; - attributes[1].binding = 0; - attributes[1].format = VK_FORMAT_R32G32_SFLOAT; - attributes[1].offset = offsetof(ScreenRectVertex, tex_coord); - return attributes; + return {{ + { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(ScreenRectVertex, position), + }, + { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(ScreenRectVertex, tex_coord), + }, + }}; } }; @@ -183,9 +187,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: return VK_FORMAT_R5G6B5_UNORM_PACK16; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", @@ -206,14 +210,16 @@ struct VKBlitScreen::BufferData { // Unaligned image data goes here }; -VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKSwapchain& swapchain, VKScheduler& scheduler, - const VKScreenInfo& screen_info) - : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain}, - scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} { +VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, + Core::Frontend::EmuWindow& render_window_, + VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, + VKResourceManager& resource_manager_, VKMemoryManager& memory_manager_, + VKSwapchain& swapchain_, VKScheduler& scheduler_, + const VKScreenInfo& screen_info_) + : cpu_memory{cpu_memory_}, render_window{render_window_}, + rasterizer{rasterizer_}, device{device_}, resource_manager{resource_manager_}, + memory_manager{memory_manager_}, swapchain{swapchain_}, scheduler{scheduler_}, + image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { watches.resize(image_count); std::generate(watches.begin(), watches.end(), []() { return std::make_unique<VKFenceWatch>(); }); @@ -255,7 +261,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon const auto pixel_format = VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const auto host_ptr = system.Memory().GetPointer(framebuffer_addr); + const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); // TODO(Rodrigo): Read this from HLE @@ -267,20 +273,25 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkBufferImageCopy copy; - copy.bufferOffset = image_offset; - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - copy.imageSubresource.mipLevel = 0; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = 1; - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = framebuffer.width; - copy.imageExtent.height = framebuffer.height; - copy.imageExtent.depth = 1; + const VkBufferImageCopy copy{ + .bufferOffset = image_offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + }; scheduler.Record( [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); @@ -295,11 +306,9 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { - VkClearValue clear_color; - clear_color.color.float32[0] = 0.0f; - clear_color.color.float32[1] = 0.0f; - clear_color.color.float32[2] = 0.0f; - clear_color.color.float32[3] = 0.0f; + const VkClearValue clear_color{ + .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, + }; VkRenderPassBeginInfo renderpass_bi; renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -379,93 +388,109 @@ void VKBlitScreen::CreateSemaphores() { } void VKBlitScreen::CreateDescriptorPool() { - std::array<VkDescriptorPoolSize, 2> pool_sizes; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = static_cast<u32>(image_count); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[1].descriptorCount = static_cast<u32>(image_count); - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = static_cast<u32>(image_count); - ci.poolSizeCount = static_cast<u32>(pool_sizes.size()); - ci.pPoolSizes = pool_sizes.data(); + const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ + { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = static_cast<u32>(image_count), + }, + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast<u32>(image_count), + }, + }}; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = static_cast<u32>(image_count), + .poolSizeCount = static_cast<u32>(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); } void VKBlitScreen::CreateRenderPass() { - VkAttachmentDescription color_attachment; - color_attachment.flags = 0; - color_attachment.format = swapchain.GetImageFormat(); - color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; - color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - - VkAttachmentReference color_attachment_ref; - color_attachment_ref.attachment = 0; - color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = 1; - subpass_description.pColorAttachments = &color_attachment_ref; - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; - - VkSubpassDependency dependency; - dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - dependency.dstSubpass = 0; - dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.srcAccessMask = 0; - dependency.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo renderpass_ci; - renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - renderpass_ci.pNext = nullptr; - renderpass_ci.flags = 0; - renderpass_ci.attachmentCount = 1; - renderpass_ci.pAttachments = &color_attachment; - renderpass_ci.subpassCount = 1; - renderpass_ci.pSubpasses = &subpass_description; - renderpass_ci.dependencyCount = 1; - renderpass_ci.pDependencies = &dependency; + const VkAttachmentDescription color_attachment{ + .flags = 0, + .format = swapchain.GetImageFormat(), + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + }; + + const VkAttachmentReference color_attachment_ref{ + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = 1, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + + const VkSubpassDependency dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dependencyFlags = 0, + }; + + const VkRenderPassCreateInfo renderpass_ci{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &dependency, + }; renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); } void VKBlitScreen::CreateDescriptorSetLayout() { - std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings; - layout_bindings[0].binding = 0; - layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - layout_bindings[0].descriptorCount = 1; - layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - layout_bindings[0].pImmutableSamplers = nullptr; - layout_bindings[1].binding = 1; - layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - layout_bindings[1].descriptorCount = 1; - layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - layout_bindings[1].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(layout_bindings.size()); - ci.pBindings = layout_bindings.data(); + const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, + }, + }}; + + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(layout_bindings.size()), + .pBindings = layout_bindings.data(), + }; descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); } @@ -473,175 +498,192 @@ void VKBlitScreen::CreateDescriptorSetLayout() { void VKBlitScreen::CreateDescriptorSets() { const std::vector layouts(image_count, *descriptor_set_layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = *descriptor_pool; - ai.descriptorSetCount = static_cast<u32>(image_count); - ai.pSetLayouts = layouts.data(); + const VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = static_cast<u32>(image_count), + .pSetLayouts = layouts.data(), + }; + descriptor_sets = descriptor_pool.Allocate(ai); } void VKBlitScreen::CreatePipelineLayout() { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; + const VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); } void VKBlitScreen::CreateGraphicsPipeline() { - std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages; - shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - shader_stages[0].pNext = nullptr; - shader_stages[0].flags = 0; - shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - shader_stages[0].module = *vertex_shader; - shader_stages[0].pName = "main"; - shader_stages[0].pSpecializationInfo = nullptr; - shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - shader_stages[1].pNext = nullptr; - shader_stages[1].flags = 0; - shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - shader_stages[1].module = *fragment_shader; - shader_stages[1].pName = "main"; - shader_stages[1].pSpecializationInfo = nullptr; + const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; const auto vertex_binding_description = ScreenRectVertex::GetDescription(); const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - VkPipelineVertexInputStateCreateInfo vertex_input_ci; - vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_input_ci.pNext = nullptr; - vertex_input_ci.flags = 0; - vertex_input_ci.vertexBindingDescriptionCount = 1; - vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description; - vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}; - vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data(); - - VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; - input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_assembly_ci.pNext = nullptr; - input_assembly_ci.flags = 0; - input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - input_assembly_ci.primitiveRestartEnable = VK_FALSE; - - VkPipelineViewportStateCreateInfo viewport_state_ci; - viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_ci.pNext = nullptr; - viewport_state_ci.flags = 0; - viewport_state_ci.viewportCount = 1; - viewport_state_ci.pViewports = nullptr; - viewport_state_ci.scissorCount = 1; - viewport_state_ci.pScissors = nullptr; - - VkPipelineRasterizationStateCreateInfo rasterization_ci; - rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_ci.pNext = nullptr; - rasterization_ci.flags = 0; - rasterization_ci.depthClampEnable = VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = VK_FALSE; - rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_ci.cullMode = VK_CULL_MODE_NONE; - rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE; - rasterization_ci.depthBiasEnable = VK_FALSE; - rasterization_ci.depthBiasConstantFactor = 0.0f; - rasterization_ci.depthBiasClamp = 0.0f; - rasterization_ci.depthBiasSlopeFactor = 0.0f; - rasterization_ci.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo multisampling_ci; - multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisampling_ci.pNext = nullptr; - multisampling_ci.flags = 0; - multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisampling_ci.sampleShadingEnable = VK_FALSE; - multisampling_ci.minSampleShading = 0.0f; - multisampling_ci.pSampleMask = nullptr; - multisampling_ci.alphaToCoverageEnable = VK_FALSE; - multisampling_ci.alphaToOneEnable = VK_FALSE; - - VkPipelineColorBlendAttachmentState color_blend_attachment; - color_blend_attachment.blendEnable = VK_FALSE; - color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD; - color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD; - color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - - VkPipelineColorBlendStateCreateInfo color_blend_ci; - color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - color_blend_ci.flags = 0; - color_blend_ci.pNext = nullptr; - color_blend_ci.logicOpEnable = VK_FALSE; - color_blend_ci.logicOp = VK_LOGIC_OP_COPY; - color_blend_ci.attachmentCount = 1; - color_blend_ci.pAttachments = &color_blend_attachment; - color_blend_ci.blendConstants[0] = 0.0f; - color_blend_ci.blendConstants[1] = 0.0f; - color_blend_ci.blendConstants[2] = 0.0f; - color_blend_ci.blendConstants[3] = 0.0f; - - static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR}; - VkPipelineDynamicStateCreateInfo dynamic_state_ci; - dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_ci.pNext = nullptr; - dynamic_state_ci.flags = 0; - dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); - dynamic_state_ci.pDynamicStates = dynamic_states.data(); - - VkGraphicsPipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.stageCount = static_cast<u32>(shader_stages.size()); - pipeline_ci.pStages = shader_stages.data(); - pipeline_ci.pVertexInputState = &vertex_input_ci; - pipeline_ci.pInputAssemblyState = &input_assembly_ci; - pipeline_ci.pTessellationState = nullptr; - pipeline_ci.pViewportState = &viewport_state_ci; - pipeline_ci.pRasterizationState = &rasterization_ci; - pipeline_ci.pMultisampleState = &multisampling_ci; - pipeline_ci.pDepthStencilState = nullptr; - pipeline_ci.pColorBlendState = &color_blend_ci; - pipeline_ci.pDynamicState = &dynamic_state_ci; - pipeline_ci.layout = *pipeline_layout; - pipeline_ci.renderPass = *renderpass; - pipeline_ci.subpass = 0; - pipeline_ci.basePipelineHandle = 0; - pipeline_ci.basePipelineIndex = 0; + const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &vertex_binding_description, + .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, + .pVertexAttributeDescriptions = vertex_attrs_description.data(), + }; + + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = VK_FALSE, + }; + + const VkPipelineViewportStateCreateInfo viewport_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, + }; + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisampling_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineColorBlendAttachmentState color_blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + + static constexpr std::array dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const VkGraphicsPipelineCreateInfo pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); } void VKBlitScreen::CreateSampler() { - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.magFilter = VK_FILTER_LINEAR; - ci.minFilter = VK_FILTER_NEAREST; - ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - ci.mipLodBias = 0.0f; - ci.anisotropyEnable = VK_FALSE; - ci.maxAnisotropy = 0.0f; - ci.compareEnable = VK_FALSE; - ci.compareOp = VK_COMPARE_OP_NEVER; - ci.minLod = 0.0f; - ci.maxLod = 0.0f; - ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - ci.unnormalizedCoordinates = VK_FALSE; + const VkSamplerCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; sampler = device.GetLogical().CreateSampler(ci); } @@ -650,15 +692,17 @@ void VKBlitScreen::CreateFramebuffers() { const VkExtent2D size{swapchain.GetSize()}; framebuffers.resize(image_count); - VkFramebufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.renderPass = *renderpass; - ci.attachmentCount = 1; - ci.width = size.width; - ci.height = size.height; - ci.layers = 1; + VkFramebufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = *renderpass, + .attachmentCount = 1, + .pAttachments = nullptr, + .width = size.width, + .height = size.height, + .layers = 1, + }; for (std::size_t i = 0; i < image_count; ++i) { const VkImageView image_view{swapchain.GetImageViewIndex(i)}; @@ -678,16 +722,17 @@ void VKBlitScreen::ReleaseRawImages() { } void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = CalculateBufferSize(framebuffer); - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = CalculateBufferSize(framebuffer), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer = device.GetLogical().CreateBuffer(ci); buffer_commit = memory_manager.Commit(buffer, true); @@ -697,24 +742,28 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) raw_images.resize(image_count); raw_buffer_commits.resize(image_count); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = VK_IMAGE_TYPE_2D; - ci.format = GetFormat(framebuffer); - ci.extent.width = framebuffer.width; - ci.extent.height = framebuffer.height; - ci.extent.depth = 1; - ci.mipLevels = 1; - ci.arrayLayers = 1; - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_LINEAR; - ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + const VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = GetFormat(framebuffer), + .extent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; for (std::size_t i = 0; i < image_count; ++i) { raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); @@ -723,39 +772,43 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) } void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { - VkDescriptorBufferInfo buffer_info; - buffer_info.buffer = *buffer; - buffer_info.offset = offsetof(BufferData, uniform); - buffer_info.range = sizeof(BufferData::uniform); - - VkWriteDescriptorSet ubo_write; - ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - ubo_write.pNext = nullptr; - ubo_write.dstSet = descriptor_sets[image_index]; - ubo_write.dstBinding = 0; - ubo_write.dstArrayElement = 0; - ubo_write.descriptorCount = 1; - ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - ubo_write.pImageInfo = nullptr; - ubo_write.pBufferInfo = &buffer_info; - ubo_write.pTexelBufferView = nullptr; - - VkDescriptorImageInfo image_info; - image_info.sampler = *sampler; - image_info.imageView = image_view; - image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - VkWriteDescriptorSet sampler_write; - sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - sampler_write.pNext = nullptr; - sampler_write.dstSet = descriptor_sets[image_index]; - sampler_write.dstBinding = 1; - sampler_write.dstArrayElement = 0; - sampler_write.descriptorCount = 1; - sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - sampler_write.pImageInfo = &image_info; - sampler_write.pBufferInfo = nullptr; - sampler_write.pTexelBufferView = nullptr; + const VkDescriptorBufferInfo buffer_info{ + .buffer = *buffer, + .offset = offsetof(BufferData, uniform), + .range = sizeof(BufferData::uniform), + }; + + const VkWriteDescriptorSet ubo_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pImageInfo = nullptr, + .pBufferInfo = &buffer_info, + .pTexelBufferView = nullptr, + }; + + const VkDescriptorImageInfo image_info{ + .sampler = *sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 243640fab..838d38f69 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -15,6 +15,10 @@ namespace Core { class System; } +namespace Core::Memory { +class Memory; +} + namespace Core::Frontend { class EmuWindow; } @@ -39,7 +43,8 @@ class VKSwapchain; class VKBlitScreen final { public: - explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, + explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, + Core::Frontend::EmuWindow& render_window, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKSwapchain& swapchain, VKScheduler& scheduler, @@ -81,7 +86,7 @@ private: u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, std::size_t image_index) const; - Core::System& system; + Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; VideoCore::RasterizerInterface& rasterizer; const VKDevice& device; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2be38d419..d9d3da9ea 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(size); - ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(size), + .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer.handle = device.GetLogical().CreateBuffer(ci); buffer.commit = memory_manager.Commit(buffer.handle, false); @@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barrier, {}); }); @@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const VkBuffer handle = Handle(); scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | @@ -142,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst }); } -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory, + CreateStreamBuffer(device_, + scheduler_)}, + device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} {} VKBufferCache::~VKBufferCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 991ee451c..7fb5ceedf 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -13,10 +13,6 @@ #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/wrapper.h" -namespace Core { -class System; -} - namespace Vulkan { class VKDevice; @@ -53,7 +49,8 @@ private: class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index da71e710c..182461ed9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -115,32 +115,32 @@ constexpr u8 quad_array[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { - VkDescriptorSetLayoutBinding binding; - binding.binding = 0; - binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = nullptr; - return binding; + return { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; } VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - VkPushConstantRange range; - range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - range.offset = 0; - range.size = static_cast<u32>(size); - return range; + return { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast<u32>(size), + }; } // Uint8 SPIR-V module. Generated from the "shaders/" directory. @@ -344,29 +344,33 @@ constexpr u8 QUAD_INDEXED_SPV[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { - std::array<VkDescriptorSetLayoutBinding, 2> bindings; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].pImmutableSamplers = nullptr; - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].pImmutableSamplers = nullptr; - return bindings; + return {{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + }}; } VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 2; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } } // Anonymous namespace @@ -376,37 +380,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, const u8* code) { - VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; - descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout_ci.pNext = nullptr; - descriptor_layout_ci.flags = 0; - descriptor_layout_ci.bindingCount = bindings.size(); - descriptor_layout_ci.pBindings = bindings.data(); - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); - - VkPipelineLayoutCreateInfo pipeline_layout_ci; - pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_ci.pNext = nullptr; - pipeline_layout_ci.flags = 0; - pipeline_layout_ci.setLayoutCount = 1; - pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); - pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); - pipeline_layout_ci.pPushConstantRanges = push_constants.data(); - layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }); + + layout = device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }); if (!templates.empty()) { - VkDescriptorUpdateTemplateCreateInfoKHR template_ci; - template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - template_ci.pNext = nullptr; - template_ci.flags = 0; - template_ci.descriptorUpdateEntryCount = templates.size(); - template_ci.pDescriptorUpdateEntries = templates.data(); - template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - template_ci.descriptorSetLayout = *descriptor_set_layout; - template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - template_ci.pipelineLayout = *layout; - template_ci.set = 0; - descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = templates.size(), + .pDescriptorUpdateEntries = templates.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = 0, + }); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } @@ -414,32 +418,32 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - VkShaderModuleCreateInfo module_ci; - module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - module_ci.pNext = nullptr; - module_ci.flags = 0; - module_ci.codeSize = code_size; - module_ci.pCode = code_copy.get(); - module = device.GetLogical().CreateShaderModule(module_ci); - - VkComputePipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.layout = *layout; - pipeline_ci.basePipelineHandle = nullptr; - pipeline_ci.basePipelineIndex = 0; - - VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + module = device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = code_copy.get(), + }); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 281bf9ac3..ed9d2991c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = 1; - entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); } }; add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); @@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(bindings.size()); - ci.pBindings = bindings.data(); - return device.GetLogical().CreateDescriptorSetLayout(ci); + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); } vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; - return device.GetLogical().CreatePipelineLayout(ci); + return device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); } vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { @@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }); } vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { device.SaveShader(code); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code.size() * sizeof(u32); - ci.pCode = code.data(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code.size() * sizeof(u32), + .pCode = code.data(), + }); } vk::Pipeline VKComputePipeline::CreatePipeline() const { - VkComputePipelineCreateInfo ci; - VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *shader_module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + + VkComputePipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; + + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - stage_ci.pNext = &subgroup_size_ci; + ci.stage.pNext = &subgroup_size_ci; } - ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.layout = *layout; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; return device.GetLogical().CreateComputePipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 9259b618d..ac4a0884e 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -43,27 +43,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = num_sets; - ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); - ci.pPoolSizes = std::data(pool_sizes); + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, + }; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = num_sets, + .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), + .pPoolSizes = std::data(pool_sizes), + }; return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count) { const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = **active_pool; - ai.descriptorSetCount = static_cast<u32>(count); - ai.pSetLayouts = layout_copies.data(); + VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = **active_pool, + .descriptorSetCount = static_cast<u32>(count), + .pSetLayouts = layout_copies.data(), + }; vk::DescriptorSets sets = active_pool->Allocate(ai); if (!sets.IsOutOfPoolMemory()) { diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index fdaea4210..4205bd573 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -22,14 +22,21 @@ namespace { namespace Alternatives { -constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT, - VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}}; -constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}}; +constexpr std::array Depth24UnormS8_UINT{ + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, + VkFormat{}, +}; + +constexpr std::array Depth16UnormS8_UINT{ + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VkFormat{}, +}; } // namespace Alternatives -constexpr std::array REQUIRED_EXTENSIONS = { +constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_16BIT_STORAGE_EXTENSION_NAME, VK_KHR_8BIT_STORAGE_EXTENSION_NAME, @@ -77,14 +84,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_FORMAT_A8B8G8R8_UINT_PACK32, VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, VK_FORMAT_A8B8G8R8_SRGB_PACK32, VK_FORMAT_B5G6R5_UNORM_PACK16, VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UNORM, @@ -96,8 +108,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_SINT, VK_FORMAT_R8_UINT, VK_FORMAT_B10G11R11_UFLOAT_PACK32, VK_FORMAT_R32_SFLOAT, @@ -117,6 +132,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK, @@ -169,97 +185,104 @@ bool VKDevice::Create() { const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(); - VkPhysicalDeviceFeatures2 features2; - features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - features2.pNext = nullptr; + VkPhysicalDeviceFeatures2 features2{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = nullptr, + }; const void* first_next = &features2; void** next = &features2.pNext; - auto& features = features2.features; - features.robustBufferAccess = false; - features.fullDrawIndexUint32 = false; - features.imageCubeArray = false; - features.independentBlend = true; - features.geometryShader = true; - features.tessellationShader = true; - features.sampleRateShading = false; - features.dualSrcBlend = false; - features.logicOp = false; - features.multiDrawIndirect = false; - features.drawIndirectFirstInstance = false; - features.depthClamp = true; - features.depthBiasClamp = true; - features.fillModeNonSolid = false; - features.depthBounds = false; - features.wideLines = false; - features.largePoints = true; - features.alphaToOne = false; - features.multiViewport = true; - features.samplerAnisotropy = true; - features.textureCompressionETC2 = false; - features.textureCompressionASTC_LDR = is_optimal_astc_supported; - features.textureCompressionBC = false; - features.occlusionQueryPrecise = true; - features.pipelineStatisticsQuery = false; - features.vertexPipelineStoresAndAtomics = true; - features.fragmentStoresAndAtomics = true; - features.shaderTessellationAndGeometryPointSize = false; - features.shaderImageGatherExtended = true; - features.shaderStorageImageExtendedFormats = false; - features.shaderStorageImageMultisample = false; - features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; - features.shaderStorageImageWriteWithoutFormat = true; - features.shaderUniformBufferArrayDynamicIndexing = false; - features.shaderSampledImageArrayDynamicIndexing = false; - features.shaderStorageBufferArrayDynamicIndexing = false; - features.shaderStorageImageArrayDynamicIndexing = false; - features.shaderClipDistance = false; - features.shaderCullDistance = false; - features.shaderFloat64 = false; - features.shaderInt64 = false; - features.shaderInt16 = false; - features.shaderResourceResidency = false; - features.shaderResourceMinLod = false; - features.sparseBinding = false; - features.sparseResidencyBuffer = false; - features.sparseResidencyImage2D = false; - features.sparseResidencyImage3D = false; - features.sparseResidency2Samples = false; - features.sparseResidency4Samples = false; - features.sparseResidency8Samples = false; - features.sparseResidency16Samples = false; - features.sparseResidencyAliased = false; - features.variableMultisampleRate = false; - features.inheritedQueries = false; - - VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage; - bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; - bit16_storage.pNext = nullptr; - bit16_storage.storageBuffer16BitAccess = false; - bit16_storage.uniformAndStorageBuffer16BitAccess = true; - bit16_storage.storagePushConstant16 = false; - bit16_storage.storageInputOutput16 = false; + features2.features = { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = true, + .geometryShader = true, + .tessellationShader = true, + .sampleRateShading = false, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = true, + .depthBiasClamp = true, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = true, + .alphaToOne = false, + .multiViewport = true, + .samplerAnisotropy = true, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = is_optimal_astc_supported, + .textureCompressionBC = false, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, + .shaderStorageImageWriteWithoutFormat = true, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; + + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, + .pNext = nullptr, + .storageBuffer16BitAccess = false, + .uniformAndStorageBuffer16BitAccess = true, + .storagePushConstant16 = false, + .storageInputOutput16 = false, + }; SetNext(next, bit16_storage); - VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage; - bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR; - bit8_storage.pNext = nullptr; - bit8_storage.storageBuffer8BitAccess = false; - bit8_storage.uniformAndStorageBuffer8BitAccess = true; - bit8_storage.storagePushConstant8 = false; + VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR, + .pNext = nullptr, + .storageBuffer8BitAccess = false, + .uniformAndStorageBuffer8BitAccess = true, + .storagePushConstant8 = false, + }; SetNext(next, bit8_storage); - VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; - host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT; - host_query_reset.hostQueryReset = true; + VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT, + .hostQueryReset = true, + }; SetNext(next, host_query_reset); VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { - float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; - float16_int8.pNext = nullptr; - float16_int8.shaderFloat16 = true; - float16_int8.shaderInt8 = false; + float16_int8 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR, + .pNext = nullptr, + .shaderFloat16 = true, + .shaderInt8 = false, + }; SetNext(next, float16_int8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); @@ -271,10 +294,11 @@ bool VKDevice::Create() { VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { - std430_layout.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR; - std430_layout.pNext = nullptr; - std430_layout.uniformBufferStandardLayout = true; + std430_layout = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .uniformBufferStandardLayout = true, + }; SetNext(next, std430_layout); } else { LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); @@ -282,9 +306,11 @@ bool VKDevice::Create() { VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; if (ext_index_type_uint8) { - index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT; - index_type_uint8.pNext = nullptr; - index_type_uint8.indexTypeUint8 = true; + index_type_uint8 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, + .pNext = nullptr, + .indexTypeUint8 = true, + }; SetNext(next, index_type_uint8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); @@ -292,11 +318,12 @@ bool VKDevice::Create() { VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; if (ext_transform_feedback) { - transform_feedback.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - transform_feedback.pNext = nullptr; - transform_feedback.transformFeedback = true; - transform_feedback.geometryStreams = true; + transform_feedback = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, + .pNext = nullptr, + .transformFeedback = true, + .geometryStreams = true, + }; SetNext(next, transform_feedback); } else { LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); @@ -304,10 +331,12 @@ bool VKDevice::Create() { VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; if (ext_custom_border_color) { - custom_border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; - custom_border.pNext = nullptr; - custom_border.customBorderColors = VK_TRUE; - custom_border.customBorderColorWithoutFormat = VK_TRUE; + custom_border = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, + .pNext = nullptr, + .customBorderColors = VK_TRUE, + .customBorderColorWithoutFormat = VK_TRUE, + }; SetNext(next, custom_border); } else { LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); @@ -315,9 +344,11 @@ bool VKDevice::Create() { VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; if (ext_extended_dynamic_state) { - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - dynamic_state.extendedDynamicState = VK_TRUE; + dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .extendedDynamicState = VK_TRUE, + }; SetNext(next, dynamic_state); } else { LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); @@ -331,11 +362,13 @@ bool VKDevice::Create() { if (nv_device_diagnostics_config) { nsight_aftermath_tracker.Initialize(); - diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV; - diagnostics_nv.pNext = &features2; - diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | - VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | - VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV; + diagnostics_nv = { + .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, + .pNext = &features2, + .flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV, + }; first_next = &diagnostics_nv; } @@ -347,8 +380,18 @@ bool VKDevice::Create() { CollectTelemetryParameters(); + if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { + // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but the <stride> field + // seems to be bugged. Blacklisting it for now. + LOG_WARNING(Render_Vulkan, + "Blacklisting AMD proprietary from VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } + graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); return true; } @@ -704,13 +747,15 @@ void VKDevice::SetupFeatures() { } void VKDevice::CollectTelemetryParameters() { - VkPhysicalDeviceDriverPropertiesKHR driver; - driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR; - driver.pNext = nullptr; + VkPhysicalDeviceDriverPropertiesKHR driver{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, + .pNext = nullptr, + }; - VkPhysicalDeviceProperties2KHR properties; - properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - properties.pNext = &driver; + VkPhysicalDeviceProperties2KHR properties{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, + .pNext = &driver, + }; physical.GetProperties2KHR(properties); driver_id = driver.driverID; @@ -719,23 +764,26 @@ void VKDevice::CollectTelemetryParameters() { const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); reported_extensions.reserve(std::size(extensions)); for (const auto& extension : extensions) { - reported_extensions.push_back(extension.extensionName); + reported_extensions.emplace_back(extension.extensionName); } } std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; - std::unordered_set<u32> unique_queue_families = {graphics_family, present_family}; + std::unordered_set<u32> unique_queue_families{graphics_family, present_family}; std::vector<VkDeviceQueueCreateInfo> queue_cis; + queue_cis.reserve(unique_queue_families.size()); for (const u32 queue_family : unique_queue_families) { - VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back(); - ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.queueFamilyIndex = queue_family; - ci.queueCount = 1; + auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queueFamilyIndex = queue_family, + .queueCount = 1, + .pQueuePriorities = nullptr, + }); ci.pQueuePriorities = &QUEUE_PRIORITY; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index ae5c21baa..26a233db1 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns the maximum size for shared memory. + u32 GetMaxComputeSharedMemorySize() const { + return properties.limits.maxComputeSharedMemorySize; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -197,6 +202,11 @@ public: return reported_extensions; } + /// Returns true if the setting for async shader compilation is enabled. + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + /// Checks if the physical device is suitable. static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); @@ -247,6 +257,9 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + // Asynchronous Graphics Pipeline setting + bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline + // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index a02be5487..55a8348fc 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -29,7 +29,7 @@ void InnerFence::Queue() { } ASSERT(!event); - event = device.GetLogical().CreateEvent(); + event = device.GetLogical().CreateNewEvent(); ticks = scheduler.Ticks(); scheduler.RequestOutsideRenderPassOperationContext(); @@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const { } } -VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKTextureCache& texture_cache, VKBufferCache& buffer_cache, - VKQueryCache& query_cache) - : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), - device{device}, scheduler{scheduler} {} +VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, + VKBufferCache& buffer_cache, VKQueryCache& query_cache, + const VKDevice& device_, VKScheduler& scheduler_) + : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache), + device{device_}, scheduler{scheduler_} {} Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 043fe7947..1547d6d30 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -55,10 +55,10 @@ using GenericFenceManager = class VKFenceManager final : public GenericFenceManager { public: - explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKTextureCache& texture_cache, VKBufferCache& buffer_cache, - VKQueryCache& query_cache); + explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, + VKBufferCache& buffer_cache, VKQueryCache& query_cache, + const VKDevice& device, VKScheduler& scheduler); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 844445105..2e46c6278 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -28,15 +28,15 @@ namespace { template <class StencilFace> VkStencilOpState GetStencilFaceState(const StencilFace& face) { - VkStencilOpState state; - state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()); - state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()); - state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()); - state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()); - state.compareMask = 0; - state.writeMask = 0; - state.reference = 0; - return state; + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; } bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { @@ -52,20 +52,21 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { } VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union { + union Swizzle { u32 raw; BitField<0, 3, Maxwell::ViewportSwizzle> x; BitField<4, 3, Maxwell::ViewportSwizzle> y; BitField<8, 3, Maxwell::ViewportSwizzle> z; BitField<12, 3, Maxwell::ViewportSwizzle> w; - } const unpacked{swizzle}; - - VkViewportSwizzleNV result; - result.x = MaxwellToVK::ViewportSwizzle(unpacked.x); - result.y = MaxwellToVK::ViewportSwizzle(unpacked.y); - result.z = MaxwellToVK::ViewportSwizzle(unpacked.z); - result.w = MaxwellToVK::ViewportSwizzle(unpacked.w); - return result; + }; + const Swizzle unpacked{swizzle}; + + return { + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; } } // Anonymous namespace @@ -77,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche const GraphicsPipelineCacheKey& key, vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, + : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( program)}, - renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( - key.renderpass_params, - program)} {} + renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, + pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -100,24 +100,26 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( vk::Span<VkDescriptorSetLayoutBinding> bindings) const { - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = bindings.size(); - ci.pBindings = bindings.data(); + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }; return device.GetLogical().CreateDescriptorSetLayout(ci); } vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; + const VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; return device.GetLogical().CreatePipelineLayout(ci); } @@ -136,26 +138,28 @@ vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTempla return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; + const VkDescriptorUpdateTemplateCreateInfoKHR ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }; return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); } std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + VkShaderModuleCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; std::vector<vk::ShaderModule> modules; modules.reserve(Maxwell::MaxShaderStage); @@ -176,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& state = fixed_state; + const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; FixedPipelineState::DynamicState dynamic; @@ -204,15 +208,17 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - auto& vertex_binding = vertex_bindings.emplace_back(); - vertex_binding.binding = static_cast<u32>(index); - vertex_binding.stride = binding.stride; - vertex_binding.inputRate = rate; + vertex_bindings.push_back({ + .binding = static_cast<u32>(index), + .stride = binding.stride, + .inputRate = rate, + }); if (instanced) { - auto& binding_divisor = vertex_binding_divisors.emplace_back(); - binding_divisor.binding = static_cast<u32>(index); - binding_divisor.divisor = state.binding_divisors[index]; + vertex_binding_divisors.push_back({ + .binding = static_cast<u32>(index), + .divisor = state.binding_divisors[index], + }); } } @@ -227,116 +233,132 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa // Skip attributes not used by the vertex shaders. continue; } - auto& vertex_attribute = vertex_attributes.emplace_back(); - vertex_attribute.location = static_cast<u32>(index); - vertex_attribute.binding = attribute.buffer; - vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()); - vertex_attribute.offset = attribute.offset; + vertex_attributes.push_back({ + .location = static_cast<u32>(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - VkPipelineVertexInputStateCreateInfo vertex_input_ci; - vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_input_ci.pNext = nullptr; - vertex_input_ci.flags = 0; - vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()); - vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data(); - vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()); - vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data(); - - VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci; - input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; - input_divisor_ci.pNext = nullptr; - input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()); - input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data(); + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; - input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_assembly_ci.pNext = nullptr; - input_assembly_ci.flags = 0; - input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); - input_assembly_ci.primitiveRestartEnable = - state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); - - VkPipelineTessellationStateCreateInfo tessellation_ci; - tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; - tessellation_ci.pNext = nullptr; - tessellation_ci.flags = 0; - tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1; - - VkPipelineViewportStateCreateInfo viewport_ci; - viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_ci.pNext = nullptr; - viewport_ci.flags = 0; - viewport_ci.viewportCount = Maxwell::NumViewports; - viewport_ci.pViewports = nullptr; - viewport_ci.scissorCount = Maxwell::NumViewports; - viewport_ci.pScissors = nullptr; + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci; - swizzle_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV; - swizzle_ci.pNext = nullptr; - swizzle_ci.flags = 0; - swizzle_ci.viewportCount = Maxwell::NumViewports; - swizzle_ci.pViewportSwizzles = swizzles.data(); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; if (device.IsNvViewportSwizzleSupported()) { viewport_ci.pNext = &swizzle_ci; } - VkPipelineRasterizationStateCreateInfo rasterization_ci; - rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_ci.pNext = nullptr; - rasterization_ci.flags = 0; - rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_ci.cullMode = - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE; - rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()); - rasterization_ci.depthBiasEnable = state.depth_bias_enable; - rasterization_ci.depthBiasConstantFactor = 0.0f; - rasterization_ci.depthBiasClamp = 0.0f; - rasterization_ci.depthBiasSlopeFactor = 0.0f; - rasterization_ci.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo multisample_ci; - multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_ci.pNext = nullptr; - multisample_ci.flags = 0; - multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_ci.sampleShadingEnable = VK_FALSE; - multisample_ci.minSampleShading = 0.0f; - multisample_ci.pSampleMask = nullptr; - multisample_ci.alphaToCoverageEnable = VK_FALSE; - multisample_ci.alphaToOneEnable = VK_FALSE; - - VkPipelineDepthStencilStateCreateInfo depth_stencil_ci; - depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - depth_stencil_ci.pNext = nullptr; - depth_stencil_ci.flags = 0; - depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable; - depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable; - depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS; - depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable; - depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable; - depth_stencil_ci.front = GetStencilFaceState(dynamic.front); - depth_stencil_ci.back = GetStencilFaceState(dynamic.back); - depth_stencil_ci.minDepthBounds = 0.0f; - depth_stencil_ci.maxDepthBounds = 0.0f; + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE, + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); for (std::size_t index = 0; index < num_attachments; ++index) { - static constexpr std::array COMPONENT_TABLE = { - VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT}; + static constexpr std::array COMPONENT_TABLE{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; const auto& blend = state.attachments[index]; VkColorComponentFlags color_components = 0; @@ -346,35 +368,36 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa } } - VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index]; - attachment.blendEnable = blend.enable != 0; - attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()); - attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()); - attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()); - attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()); - attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()); - attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()); - attachment.colorWriteMask = color_components; + cb_attachments[index] = { + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = color_components, + }; } - VkPipelineColorBlendStateCreateInfo color_blend_ci; - color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - color_blend_ci.pNext = nullptr; - color_blend_ci.flags = 0; - color_blend_ci.logicOpEnable = VK_FALSE; - color_blend_ci.logicOp = VK_LOGIC_OP_COPY; - color_blend_ci.attachmentCount = static_cast<u32>(num_attachments); - color_blend_ci.pAttachments = cb_attachments.data(); - std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); - - std::vector dynamic_states = { + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast<u32>(num_attachments), + .pAttachments = cb_attachments.data(), + }; + + std::vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended = { + static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, @@ -389,18 +412,19 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } - VkPipelineDynamicStateCreateInfo dynamic_state_ci; - dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_ci.pNext = nullptr; - dynamic_state_ci.flags = 0; - dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); - dynamic_state_ci.pDynamicStates = dynamic_states.data(); + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; std::vector<VkPipelineShaderStageCreateInfo> shader_stages; std::size_t module_index = 0; @@ -408,6 +432,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa if (!program[stage]) { continue; } + VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; stage_ci.pNext = nullptr; @@ -422,26 +447,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa } } - VkGraphicsPipelineCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.stageCount = static_cast<u32>(shader_stages.size()); - ci.pStages = shader_stages.data(); - ci.pVertexInputState = &vertex_input_ci; - ci.pInputAssemblyState = &input_assembly_ci; - ci.pTessellationState = &tessellation_ci; - ci.pViewportState = &viewport_ci; - ci.pRasterizationState = &rasterization_ci; - ci.pMultisampleState = &multisample_ci; - ci.pDepthStencilState = &depth_stencil_ci; - ci.pColorBlendState = &color_blend_ci; - ci.pDynamicState = &dynamic_state_ci; - ci.layout = *layout; - ci.renderPass = renderpass; - ci.subpass = 0; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; + const VkGraphicsPipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; return device.GetLogical().CreateGraphicsPipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index a1d699a6c..58aa35efd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -19,7 +19,27 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey; +struct GraphicsPipelineCacheKey { + RenderPassParams renderpass_params; + u32 padding; + std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; + FixedPipelineState fixed_state; + + std::size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); class VKDescriptorPool; class VKDevice; @@ -54,6 +74,10 @@ public: return renderpass; } + GraphicsPipelineCacheKey GetCacheKey() const { + return cache_key; + } + private: vk::DescriptorSetLayout CreateDescriptorSetLayout( vk::Span<VkDescriptorSetLayoutBinding> bindings) const; @@ -70,7 +94,7 @@ private: const VKDevice& device; VKScheduler& scheduler; - const FixedPipelineState fixed_state; + const GraphicsPipelineCacheKey cache_key; const u64 hash; vk::DescriptorSetLayout descriptor_set_layout; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 9bceb3861..1c418ea17 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - VkImageViewCreateInfo image_view_ci; - image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_ci.pNext = nullptr; - image_view_ci.flags = 0; - image_view_ci.image = *image; - image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_ci.format = format; - image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - image_view_ci.subresourceRange.aspectMask = aspect_mask; - image_view_ci.subresourceRange.baseMipLevel = 0; - image_view_ci.subresourceRange.levelCount = 1; - image_view_ci.subresourceRange.baseArrayLayer = 0; - image_view_ci.subresourceRange.layerCount = 1; - present_view = device.GetLogical().CreateImageView(image_view_ci); + present_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index b4c650a63..24c8960ac 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t }(); // Try to allocate found type. - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = size; - memory_ai.memoryTypeIndex = type; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = size, + .memoryTypeIndex = type, + }); if (!memory) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3da835324..5c038f4bc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -28,6 +28,7 @@ #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace Vulkan { @@ -88,12 +89,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi // Combined image samplers can be arrayed. count = container[i].size; } - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = count; - entry.stageFlags = stage_flags; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = count, + .stageFlags = stage_flags, + .pImmutableSamplers = nullptr, + }); } } @@ -133,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset) - : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, - registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, - compiler_settings, registry}, - entries{GenerateShaderEntries(shader_ir)} {} +Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage, + GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_, + u32 main_offset) + : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine), + shader_ir(program_code, main_offset, compiler_settings, registry), + entries(GenerateShaderEntries(shader_ir)) {} Shader::~Shader() = default; -Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, - Tegra::Engines::ShaderType stage) { - if (stage == ShaderType::Compute) { - return system.GPU().KeplerCompute(); - } else { - return system.GPU().Maxwell3D(); - } -} - -VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache) - : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, - scheduler{scheduler}, descriptor_pool{descriptor_pool}, - update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} +VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKRenderPassCache& renderpass_cache_) + : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {} VKPipelineCache::~VKPipelineCache() = default; std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { - const auto& gpu = system.GPU().Maxwell3D(); - std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; - const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); if (!result) { - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; // No shader found - create a new one - constexpr u32 stage_offset = STAGE_MAIN_OFFSET; + static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), - stage_offset); + auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, + std::move(code), stage_offset); result = shader.get(); if (cpu_addr) { @@ -204,24 +198,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { return last_shaders = shaders; } -VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { +VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( + const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { - return *last_graphics_pipeline; + return last_graphics_pipeline; } last_graphics_key = key; + if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { + std::unique_lock lock{pipeline_cache}; + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); + if (is_cache_miss) { + gpu.ShaderNotify().MarkSharderBuilding(); + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + const auto [program, bindings] = DecompileShaders(key.fixed_state); + async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, + update_descriptor_queue, renderpass_cache, bindings, + program, key); + } + last_graphics_pipeline = pair->second.get(); + return last_graphics_pipeline; + } + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { + gpu.ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key); + const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); + gpu.ShaderNotify().MarkShaderComplete(); } - return *(last_graphics_pipeline = entry.get()); + last_graphics_pipeline = entry.get(); + return last_graphics_pipeline; } VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { @@ -234,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - auto& memory_manager = system.GPU().MemoryManager(); - const auto program_addr = key.shader; + const GPUVAddr gpu_addr = key.shader; - const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); if (!shader) { // No shader found - create a new one - const auto host_ptr = memory_manager.GetPointer(program_addr); + const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); + ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, - std::move(code), KERNEL_MAIN_OFFSET); + auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, + *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); shader = shader_info.get(); if (cpu_addr) { @@ -259,10 +271,15 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } } - Specialization specialization; - specialization.workgroup_size = key.workgroup_size; - specialization.shared_memory_size = key.shared_memory_size; - + const Specialization specialization{ + .base_binding = 0, + .workgroup_size = key.workgroup_size, + .shared_memory_size = key.shared_memory_size, + .point_size = std::nullopt, + .enabled_attributes = {}, + .attribute_types = {}, + .ndc_minus_one_to_one = false, + }; const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, shader->GetRegistry(), specialization), shader->GetEntries()}; @@ -271,6 +288,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } +void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { + gpu.ShaderNotify().MarkShaderComplete(); + std::unique_lock lock{pipeline_cache}; + graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); +} + void VKPipelineCache::OnShaderRemoval(Shader* shader) { bool finished = false; const auto Finish = [&] { @@ -306,11 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { } std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> -VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { - const auto& fixed_state = key.fixed_state; - auto& memory_manager = system.GPU().MemoryManager(); - const auto& gpu = system.GPU().Maxwell3D(); - +VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { Specialization specialization; if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || device.IsExtExtendedDynamicStateSupported()) { @@ -333,12 +352,12 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 @@ -370,13 +389,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].size; - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = num_samplers; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = num_samplers, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); ++binding; offset += num_samplers * entry_size; @@ -389,22 +409,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding + i; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = descriptor_type; - entry.offset = static_cast<std::size_t>(offset + i * entry_size); - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding + i, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = descriptor_type, + .offset = static_cast<std::size_t>(offset + i * entry_size), + .stride = entry_size, + }); } } else if (count > 0) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = count; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = count, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); } offset += count * entry_size; binding += count; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a3fe65fb..1a31fd9f6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -22,6 +22,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; - std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); - struct ComputePipelineCacheKey { GPUVAddr shader; u32 shared_memory_size; @@ -106,7 +85,8 @@ namespace Vulkan { class Shader { public: - explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine, + Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, u32 main_offset); ~Shader(); @@ -118,22 +98,19 @@ public: return shader_ir; } - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - const VideoCommon::Shader::ShaderIR& GetIR() const { return shader_ir; } + const VideoCommon::Shader::Registry& GetRegistry() const { + return registry; + } + const ShaderEntries& GetEntries() const { return entries; } private: - static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system, - Tegra::Engines::ShaderType stage); - GPUVAddr gpu_addr{}; VideoCommon::Shader::ProgramCode program_code; VideoCommon::Shader::Registry registry; @@ -143,27 +120,36 @@ private: class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { public: - explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, + explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const VKDevice& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache); ~VKPipelineCache() override; std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); - VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); + VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); + protected: void OnShaderRemoval(Shader* shader) final; private: std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( - const GraphicsPipelineCacheKey& key); + const FixedPipelineState& fixed_state); + + Tegra::GPU& gpu; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; - Core::System& system; const VKDevice& device; VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; @@ -178,6 +164,7 @@ private: GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; + std::mutex pipeline_cache; std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> graphics_cache; std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index bc91c48cc..5a97c959d 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -47,14 +47,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - VkQueryPoolCreateInfo query_pool_ci; - query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - query_pool_ci.pNext = nullptr; - query_pool_ci.flags = 0; - query_pool_ci.queryType = GetTarget(type); - query_pool_ci.queryCount = static_cast<u32>(end - begin); - query_pool_ci.pipelineStatistics = 0; - pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); + pools.push_back(device->GetLogical().CreateQueryPool({ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queryType = GetTarget(type), + .queryCount = static_cast<u32>(end - begin), + .pipelineStatistics = 0, + })); } void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { @@ -68,10 +68,11 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; } -VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler) : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, - QueryPool>{system, rasterizer}, + QueryPool>{rasterizer, maxwell3d, gpu_memory}, device{device}, scheduler{scheduler} { for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 40119e6d3..9be996e55 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -56,7 +56,8 @@ class VKQueryCache final : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, QueryPool> { public: - explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler); ~VKQueryCache(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7625871c2..bafebe294 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -14,6 +14,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/settings.h" #include "video_core/engines/kepler_compute.h" @@ -64,20 +65,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - VkViewport viewport; - viewport.x = src.translate_x - src.scale_x; - viewport.y = src.translate_y - src.scale_y; - viewport.width = width != 0.0f ? width : 1.0f; - viewport.height = height != 0.0f ? height : 1.0f; + VkViewport viewport{ + .x = src.translate_x - src.scale_x, + .y = src.translate_y - src.scale_y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = src.translate_z - src.scale_z * reduce_z, + .maxDepth = src.translate_z + src.scale_z, + }; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - viewport.minDepth = src.translate_z - src.scale_z * reduce_z; - viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } + return viewport; } @@ -378,28 +381,34 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { } } -RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, - VKScreenInfo& screen_info, const VKDevice& device, - VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, StateTracker& state_tracker, - VKScheduler& scheduler) - : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, - screen_info{screen_info}, device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, +RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, + Tegra::MemoryManager& gpu_memory_, + Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_, + const VKDevice& device_, VKResourceManager& resource_manager_, + VKMemoryManager& memory_manager_, StateTracker& state_tracker_, + VKScheduler& scheduler_) + : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_), + maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), + device(device_), resource_manager(resource_manager_), memory_manager(memory_manager_), + state_tracker(state_tracker_), scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), descriptor_pool(device), update_descriptor_queue(device, scheduler), renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, - staging_pool), - pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, - renderpass_cache), - buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device), - fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { + texture_cache(*this, maxwell3d, gpu_memory, device, resource_manager, memory_manager, + scheduler, staging_pool), + pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + descriptor_pool, update_descriptor_queue, renderpass_cache), + buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool), + sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), + fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, + scheduler), + wfi_event(device.GetLogical().CreateNewEvent()), async_shaders(emu_window) { scheduler.SetQueryCache(query_cache); + if (device.UseAsynchronousShaders()) { + async_shaders.AllocateWorkers(); + } } RasterizerVulkan::~RasterizerVulkan() = default; @@ -407,13 +416,13 @@ RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Drawing); + SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); query_cache.UpdateCounters(); - const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; - key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); + key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -437,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); - scheduler.BindGraphicsPipeline(pipeline.GetHandle()); + auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { + // Async graphics pipeline was not ready. + return; + } + + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - const auto renderpass = pipeline.GetRenderPass(); + const auto renderpass = pipeline->GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); scheduler.RequestRenderpass(renderpass, framebuffer, render_area); @@ -450,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - const auto pipeline_layout = pipeline.GetLayout(); - const auto descriptor_set = pipeline.CommitDescriptorSet(); + const auto pipeline_layout = pipeline->GetLayout(); + const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, @@ -461,15 +475,12 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { }); EndTransformFeedback(); - - system.GPU().TickWork(); } void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); - const auto& gpu = system.GPU().Maxwell3D(); - if (!system.GPU().Maxwell3D().ShouldExecute()) { + if (!maxwell3d.ShouldExecute()) { return; } @@ -478,7 +489,7 @@ void RasterizerVulkan::Clear() { query_cache.UpdateCounters(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; const bool use_depth = regs.clear_buffers.Z; @@ -508,10 +519,11 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attachment.colorAttachment = color_attachment; - attachment.clearValue = clear_value; + const VkClearAttachment attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = color_attachment, + .clearValue = clear_value, + }; cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -529,10 +541,6 @@ void RasterizerVulkan::Clear() { scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { - VkClearValue clear_value; - clear_value.depthStencil.depth = clear_depth; - clear_value.depthStencil.stencil = clear_stencil; - VkClearAttachment attachment; attachment.aspectMask = aspect_flags; attachment.colorAttachment = 0; @@ -550,14 +558,17 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - ComputePipelineCacheKey key; - key.shader = code_addr; - key.shared_memory_size = launch_desc.shared_alloc; - key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z}; - - auto& pipeline = pipeline_cache.GetComputePipeline(key); + const auto& launch_desc = kepler_compute.launch_description; + auto& pipeline = pipeline_cache.GetComputePipeline({ + .shader = code_addr, + .shared_memory_size = launch_desc.shared_alloc, + .workgroup_size = + { + launch_desc.block_dim_x, + launch_desc.block_dim_y, + launch_desc.block_dim_z, + }, + }); // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); @@ -643,16 +654,14 @@ void RasterizerVulkan::SyncGuestHost() { } void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { - gpu.MemoryManager().Write<u32>(addr, value); + gpu_memory.Write<u32>(addr, value); return; } fence_manager.SignalSemaphore(addr, value); } void RasterizerVulkan::SignalSyncPoint(u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return; @@ -661,7 +670,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { } void RasterizerVulkan::ReleaseFences() { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { return; } @@ -739,10 +747,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerVulkan::SetupDirtyFlags() { - state_tracker.Initialize(); -} - void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; @@ -766,10 +770,9 @@ void RasterizerVulkan::FlushWork() { RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& maxwell3d = system.GPU().Maxwell3D(); - auto& dirty = maxwell3d.dirty.flags; - auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d.regs; + auto& dirty = maxwell3d.dirty.flags; const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -813,8 +816,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( VkRenderPass renderpass) { - FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), - std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; + FramebufferCacheKey key{ + .renderpass = renderpass, + .width = std::numeric_limits<u32>::max(), + .height = std::numeric_limits<u32>::max(), + .layers = std::numeric_limits<u32>::max(), + .views = {}, + }; const auto try_push = [&key](const View& view) { if (!view) { @@ -827,7 +835,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( return true; }; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); for (std::size_t index = 0; index < num_attachments; ++index) { if (try_push(color_attachments[index])) { @@ -841,17 +849,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - VkFramebufferCreateInfo framebuffer_ci; - framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_ci.pNext = nullptr; - framebuffer_ci.flags = 0; - framebuffer_ci.renderPass = key.renderpass; - framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); - framebuffer_ci.pAttachments = key.views.data(); - framebuffer_ci.width = key.width; - framebuffer_ci.height = key.height; - framebuffer_ci.layers = key.layers; - framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + framebuffer = device.GetLogical().CreateFramebuffer({ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = key.renderpass, + .attachmentCount = static_cast<u32>(key.views.size()), + .pAttachments = key.views.data(), + .width = key.width, + .height = key.height, + .layers = key.layers, + }); } return {*framebuffer, VkExtent2D{key.width, key.height}}; @@ -863,13 +871,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Geometry); - const auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; SetupVertexArrays(buffer_bindings); const u32 base_instance = regs.vb_base_instance; - const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; + const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; @@ -930,7 +937,7 @@ void RasterizerVulkan::SetupImageTransitions( } void RasterizerVulkan::UpdateDynamicStates() { - auto& regs = system.GPU().Maxwell3D().regs; + auto& regs = maxwell3d.regs; UpdateViewportsState(regs); UpdateScissorsState(regs); UpdateDepthBias(regs); @@ -951,7 +958,7 @@ void RasterizerVulkan::UpdateDynamicStates() { } void RasterizerVulkan::BeginTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -983,7 +990,7 @@ void RasterizerVulkan::BeginTransformFeedback() { } void RasterizerVulkan::EndTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -996,7 +1003,7 @@ void RasterizerVulkan::EndTransformFeedback() { } void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; @@ -1022,7 +1029,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar if (params.num_vertices == 0) { return; } - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; switch (regs.draw.topology) { case Maxwell::PrimitiveTopology::Quads: { if (!params.is_indexed) { @@ -1070,8 +1077,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& gpu = system.GPU().Maxwell3D(); - const auto& shader_stage = gpu.state.shader_stages[stage]; + const auto& shader_stage = maxwell3d.state.shader_stages[stage]; for (const auto& entry : entries.const_buffers) { SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); } @@ -1079,8 +1085,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - auto& gpu{system.GPU()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; + const auto& cbufs{maxwell3d.state.shader_stages[stage]}; for (const auto& entry : entries.global_buffers) { const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); @@ -1090,19 +1095,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(gpu, entry, stage).tic; + const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; SetupUniformTexels(image, entry); } } void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(gpu, entry, stage, i); + const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); SetupTexture(texture, entry); } } @@ -1110,25 +1113,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(gpu, entry, stage).tic; + const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; SetupStorageTexel(image, entry); } } void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(gpu, entry, stage).tic; + const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; SetupImage(tic, entry); } } void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); @@ -1142,7 +1143,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; for (const auto& entry : entries.global_buffers) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; SetupGlobalBuffer(entry, addr); @@ -1151,19 +1152,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupUniformTexels(image, entry); } } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); + const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); SetupTexture(texture, entry); } } @@ -1171,18 +1170,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupStorageTexel(image, entry); } } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupImage(tic, entry); } } @@ -1206,9 +1203,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto actual_addr = memory_manager.Read<u64>(address); - const auto size = memory_manager.Read<u32>(address + 8); + const u64 actual_addr = gpu_memory.Read<u64>(address); + const u32 size = gpu_memory.Read<u32>(address + 8); if (size == 0) { // Sometimes global memory pointers don't have a proper size. Upload a dummy entry @@ -1426,10 +1422,10 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { } void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchPrimitiveTopology()) { + const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); + if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) { return; } - const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); }); @@ -1491,7 +1487,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { } std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -1506,9 +1502,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { } std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; - return static_cast<std::size_t>(regs.index_array.count) * - static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); + return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * + static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } std::size_t RasterizerVulkan::CalculateConstBufferSize( @@ -1523,7 +1518,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); RenderPassParams params; @@ -1553,17 +1548,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() { return *default_buffer; } - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = DEFAULT_BUFFER_SIZE; - ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - default_buffer = device.GetLogical().CreateBuffer(ci); + default_buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = DEFAULT_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); default_buffer_commit = memory_manager.Commit(default_buffer, false); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 923178b0b..16251d0f6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" namespace Core { class System; @@ -105,7 +106,8 @@ struct ImageView { class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, + explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, StateTracker& state_tracker, VKScheduler& scheduler); @@ -134,7 +136,14 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void SetupDirtyFlags() override; + + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; @@ -270,8 +279,11 @@ private: VkBuffer DefaultBuffer(); - Core::System& system; - Core::Frontend::EmuWindow& render_window; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + VKScreenInfo& screen_info; const VKDevice& device; VKResourceManager& resource_manager; @@ -291,12 +303,13 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; - VKFenceManager fence_manager; VKQueryCache query_cache; + VKFenceManager fence_manager; vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; vk::Event wfi_event; + VideoCommon::Shader::AsyncShaders async_shaders; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 3f71d005e..80284cf92 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { using namespace VideoCore::Surface; + const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); + std::vector<VkAttachmentDescription> descriptors; + descriptors.reserve(num_attachments); + std::vector<VkAttachmentReference> color_references; + color_references.reserve(num_attachments); - const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); for (std::size_t rt = 0; rt < num_attachments; ++rt) { const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); @@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - descriptor.initialLayout = color_layout; - descriptor.finalLayout = color_layout; - - VkAttachmentReference& reference = color_references.emplace_back(); - reference.attachment = static_cast<u32>(rt); - reference.layout = color_layout; + descriptors.push_back({ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = color_layout, + .finalLayout = color_layout, + }); + + color_references.push_back({ + .attachment = static_cast<u32>(rt), + .layout = color_layout, + }); } VkAttachmentReference zeta_attachment_ref; @@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout zeta_layout = params.zeta_texception != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = 0; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.initialLayout = zeta_layout; - descriptor.finalLayout = zeta_layout; - - zeta_attachment_ref.attachment = static_cast<u32>(num_attachments); - zeta_attachment_ref.layout = zeta_layout; + descriptors.push_back({ + .flags = 0, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = zeta_layout, + .finalLayout = zeta_layout, + }); + + zeta_attachment_ref = { + .attachment = static_cast<u32>(num_attachments), + .layout = zeta_layout, + }; } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); - subpass_description.pColorAttachments = color_references.data(); - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(color_references.size()), + .pColorAttachments = color_references.data(), + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; VkAccessFlags access = 0; VkPipelineStageFlags stage = 0; @@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - VkSubpassDependency subpass_dependency; - subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - subpass_dependency.dstSubpass = 0; - subpass_dependency.srcStageMask = stage; - subpass_dependency.dstStageMask = stage; - subpass_dependency.srcAccessMask = 0; - subpass_dependency.dstAccessMask = access; - subpass_dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.attachmentCount = static_cast<u32>(descriptors.size()); - ci.pAttachments = descriptors.data(); - ci.subpassCount = 1; - ci.pSubpasses = &subpass_description; - ci.dependencyCount = 1; - ci.pDependencies = &subpass_dependency; - return device.GetLogical().CreateRenderPass(ci); + const VkSubpassDependency subpass_dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = stage, + .dstStageMask = stage, + .srcAccessMask = 0, + .dstAccessMask = access, + .dependencyFlags = 0, + }; + + return device.GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptors.size()), + .pAttachments = descriptors.data(), + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &subpass_dependency, + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index dc06f545a..f19330a36 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -18,33 +18,32 @@ namespace { constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; constexpr std::size_t FENCES_GROW_STEP = 0x40; -VkFenceCreateInfo BuildFenceCreateInfo() { - VkFenceCreateInfo fence_ci; - fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_ci.pNext = nullptr; - fence_ci.flags = 0; - return fence_ci; +constexpr VkFenceCreateInfo BuildFenceCreateInfo() { + return { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; } } // Anonymous namespace class CommandBufferPool final : public VKFencedPool { public: - CommandBufferPool(const VKDevice& device) + explicit CommandBufferPool(const VKDevice& device) : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} void Allocate(std::size_t begin, std::size_t end) override { // Command buffers are going to be commited, recorded, executed every single usage cycle. // They are also going to be reseted when commited. - VkCommandPoolCreateInfo command_pool_ci; - command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - command_pool_ci.pNext = nullptr; - command_pool_ci.flags = - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); - Pool& pool = pools.emplace_back(); - pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); + pool.handle = device.GetLogical().CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 616eacc36..b068888f9 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -44,32 +44,36 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); const std::array color = tsc.GetBorderColor(); - VkSamplerCustomBorderColorCreateInfoEXT border; - border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; - border.pNext = nullptr; - border.format = VK_FORMAT_UNDEFINED; + VkSamplerCustomBorderColorCreateInfoEXT border{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .customBorderColor = {}, + .format = VK_FORMAT_UNDEFINED, + }; std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = arbitrary_borders ? &border : nullptr; - ci.flags = 0; - ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); - ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); - ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); - ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); - ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); - ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); - ci.mipLodBias = tsc.GetLodBias(); - ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; - ci.maxAnisotropy = tsc.GetMaxAnisotropy(); - ci.compareEnable = tsc.depth_compare_enabled; - ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); - ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); - ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); - ci.unnormalizedCoordinates = VK_FALSE; - return device.GetLogical().CreateSampler(ci); + return device.GetLogical().CreateSampler({ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = arbitrary_borders ? &border : nullptr, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.GetLodBias(), + .anisotropyEnable = + static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = tsc.GetMaxAnisotropy(), + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); } VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 56524e6f3..dbbd0961a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -100,16 +100,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame state.framebuffer = framebuffer; state.render_area = render_area; - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = render_area; - renderpass_bi.clearValueCount = 0; - renderpass_bi.pClearValues = nullptr; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { @@ -157,16 +160,17 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { current_cmdbuf.End(); - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = current_cmdbuf.address(); - submit_info.signalSemaphoreCount = semaphore ? 1 : 0; - submit_info.pSignalSemaphores = &semaphore; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = current_cmdbuf.address(), + .signalSemaphoreCount = semaphore ? 1U : 0U, + .pSignalSemaphores = &semaphore, + }; switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { case VK_SUCCESS: break; @@ -181,19 +185,18 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { void VKScheduler::AllocateNewContext() { ++ticks; - VkCommandBufferBeginInfo cmdbuf_bi; - cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmdbuf_bi.pNext = nullptr; - cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmdbuf_bi.pInheritanceInfo = nullptr; - std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), device.GetDispatchLoader()); - current_cmdbuf.Begin(cmdbuf_bi); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97429cc59..cd7d7a4e4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -685,13 +685,19 @@ private: } t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - const u32 smem_size = specialization.shared_memory_size; + u32 smem_size = specialization.shared_memory_size * 4; if (smem_size == 0) { // Avoid declaring an empty array. return; } - const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); - const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (smem_size > limit) { + LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", + smem_size, limit); + smem_size = limit; + } + + const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); Name(type_pointer, "SharedMemory"); @@ -700,9 +706,9 @@ private: } void DeclareInternalFlags() { - constexpr std::array names = {"zero", "sign", "carry", "overflow"}; + static constexpr std::array names{"zero", "sign", "carry", "overflow"}; + for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -2798,7 +2804,6 @@ private: std::map<GlobalMemoryBase, Id> global_buffers; std::map<u32, TexelBuffer> uniform_texels; std::map<u32, SampledImage> sampled_images; - std::map<u32, TexelBuffer> storage_texels; std::map<u32, StorageImage> images; std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 112df9c71..c1a218d76 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code_size; - ci.pCode = data.get(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = data.get(), + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 45c180221..5eca0ab91 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -71,20 +71,19 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { const u32 log2 = Common::Log2Ceil64(size); - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = 1ULL << log2; - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->handle = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 1ULL << log2, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); buffer->commit = memory_manager.Commit(buffer->handle, host_visible); auto& entries = GetCache(host_visible)[log2].entries; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index e5a583dd5..5d2c4a796 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() { flags[DepthWriteEnable] = true; flags[DepthCompareOp] = true; flags[FrontFace] = true; - flags[PrimitiveTopology] = true; flags[StencilOp] = true; flags[StencilTestEnable] = true; return flags; @@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) { table[OFF(screen_y_control)] = FrontFace; } -void SetupDirtyPrimitiveTopology(Tables& tables) { - tables[0][OFF(draw.topology)] = PrimitiveTopology; -} - void SetupDirtyStencilOp(Tables& tables) { auto& table = tables[0]; table[OFF(stencil_front_op_fail)] = StencilOp; @@ -137,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) { } // Anonymous namespace -StateTracker::StateTracker(Core::System& system) - : system{system}, invalidation_flags{MakeInvalidationFlags()} {} - -void StateTracker::Initialize() { - auto& dirty = system.GPU().Maxwell3D().dirty; - auto& tables = dirty.tables; +StateTracker::StateTracker(Tegra::GPU& gpu) + : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { + auto& tables = gpu.Maxwell3D().dirty.tables; SetupDirtyRenderTargets(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -156,12 +148,8 @@ void StateTracker::Initialize() { SetupDirtyDepthWriteEnable(tables); SetupDirtyDepthCompareOp(tables); SetupDirtyFrontFace(tables); - SetupDirtyPrimitiveTopology(tables); SetupDirtyStencilOp(tables); -} - -void StateTracker::InvalidateCommandBufferState() { - system.GPU().Maxwell3D().dirty.flags |= invalidation_flags; + SetupDirtyStencilTestEnable(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 54ca0d6c6..1de789e57 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -32,7 +32,6 @@ enum : u8 { DepthWriteEnable, DepthCompareOp, FrontFace, - PrimitiveTopology, StencilOp, StencilTestEnable, @@ -43,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max()); } // namespace Dirty class StateTracker { -public: - explicit StateTracker(Core::System& system); + using Maxwell = Tegra::Engines::Maxwell3D::Regs; - void Initialize(); +public: + explicit StateTracker(Tegra::GPU& gpu); - void InvalidateCommandBufferState(); + void InvalidateCommandBufferState() { + flags |= invalidation_flags; + current_topology = INVALID_TOPOLOGY; + } bool TouchViewports() { return Exchange(Dirty::Viewports, false); @@ -102,10 +104,6 @@ public: return Exchange(Dirty::FrontFace, false); } - bool TouchPrimitiveTopology() { - return Exchange(Dirty::PrimitiveTopology, false); - } - bool TouchStencilOp() { return Exchange(Dirty::StencilOp, false); } @@ -114,16 +112,24 @@ public: return Exchange(Dirty::StencilTestEnable, false); } + bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) { + const bool has_changed = current_topology != new_topology; + current_topology = new_topology; + return has_changed; + } + private: + static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); + bool Exchange(std::size_t id, bool new_value) const noexcept { - auto& flags = system.GPU().Maxwell3D().dirty.flags; const bool is_dirty = flags[id]; flags[id] = new_value; return is_dirty; } - Core::System& system; + Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; + Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 2d28a6c47..3c9171a5e 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -57,9 +57,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, VkBufferUsageFlags usage) - : device{device}, scheduler{scheduler} { + : device{device_}, scheduler{scheduler_} { CreateBuffers(usage); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); @@ -122,30 +122,27 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; - - VkBufferCreateInfo buffer_ci; - buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_ci.pNext = nullptr; - buffer_ci.flags = 0; - buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); - buffer_ci.usage = usage; - buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_ci.queueFamilyIndexCount = 0; - buffer_ci.pQueueFamilyIndices = nullptr; - - buffer = device.GetLogical().CreateBuffer(buffer_ci); + buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); const u32 required_flags = requirements.memoryTypeBits; stream_buffer_size = static_cast<u64>(requirements.size); - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - - memory = device.GetLogical().AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = GetMemoryType(memory_properties, required_flags), + }); buffer.BindMemory(*memory, 0); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index bffd8f32a..6bfd2abae 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -95,15 +95,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - VkPresentInfoKHR present_info; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.pNext = nullptr; - present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; - present_info.pWaitSemaphores = semaphores.data(); - present_info.swapchainCount = 1; - present_info.pSwapchains = swapchain.address(); - present_info.pImageIndices = &image_index; - present_info.pResults = nullptr; + const VkPresentInfoKHR present_info{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreCount = render_semaphore ? 2U : 1U, + .pWaitSemaphores = semaphores.data(), + .swapchainCount = 1, + .pSwapchains = swapchain.address(), + .pImageIndices = &image_index, + .pResults = nullptr, + }; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: @@ -147,24 +148,26 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci; - swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - swapchain_ci.pNext = nullptr; - swapchain_ci.flags = 0; - swapchain_ci.surface = surface; - swapchain_ci.minImageCount = requested_image_count; - swapchain_ci.imageFormat = surface_format.format; - swapchain_ci.imageColorSpace = surface_format.colorSpace; - swapchain_ci.imageArrayLayers = 1; - swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchain_ci.queueFamilyIndexCount = 0; - swapchain_ci.pQueueFamilyIndices = nullptr; - swapchain_ci.preTransform = capabilities.currentTransform; - swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - swapchain_ci.presentMode = present_mode; - swapchain_ci.clipped = VK_FALSE; - swapchain_ci.oldSwapchain = nullptr; + VkSwapchainCreateInfoKHR swapchain_ci{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .surface = surface, + .minImageCount = requested_image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = {}, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .preTransform = capabilities.currentTransform, + .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + .presentMode = present_mode, + .clipped = VK_FALSE, + .oldSwapchain = nullptr, + }; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -173,8 +176,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); - } else { - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. @@ -200,20 +201,29 @@ void VKSwapchain::CreateSemaphores() { } void VKSwapchain::CreateImageViews() { - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - // ci.image - ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - ci.format = image_format; - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - ci.subresourceRange.baseMipLevel = 0; - ci.subresourceRange.levelCount = 1; - ci.subresourceRange.baseArrayLayer = 0; - ci.subresourceRange.layerCount = 1; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = {}, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 430031665..06182d909 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) { vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(host_memory_size); - ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - return device.GetLogical().CreateBuffer(ci); + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(host_memory_size), + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); } VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, @@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - VkBufferViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.buffer = buffer; - ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - ci.offset = 0; - ci.range = static_cast<VkDeviceSize>(host_memory_size); - return ci; + return { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = buffer, + .format = + MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, + .offset = 0, + .range = static_cast<VkDeviceSize>(host_memory_size), + }; } VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -130,23 +132,24 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = SurfaceTargetToImage(params.target); - ci.format = format; - ci.mipLevels = params.num_levels; - ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_OPTIMAL; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = SurfaceTargetToImage(params.target), + .format = format, + .extent = {}, + .mipLevels = params.num_levels, + .arrayLayers = static_cast<u32>(params.GetNumLayers()), + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; if (attachable) { ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -185,13 +188,13 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl } // Anonymous namespace -CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool, - GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, - device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { +CachedSurface::CachedSurface(const VKDevice& device, VKResourceManager& resource_manager, + VKMemoryManager& memory_manager, VKScheduler& scheduler, + VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, + const SurfaceParams& params) + : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device}, + resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, + staging_pool{staging_pool} { if (params.IsBuffer()) { buffer = CreateBuffer(device, params, host_memory_size); commit = memory_manager.Commit(buffer, false); @@ -233,7 +236,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { UNIMPLEMENTED_IF(params.IsBuffer()); - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); } @@ -281,12 +284,10 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; - barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; - barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = 0; - barrier.dstQueueFamilyIndex = 0; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.buffer = dst_buffer; barrier.offset = 0; barrier.size = size; @@ -323,22 +324,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { } VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - VkBufferImageCopy copy; - copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = image->GetAspectMask(); - copy.imageSubresource.mipLevel = level; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = params.GetMipWidth(level); - copy.imageExtent.height = params.GetMipHeight(level); - copy.imageExtent.depth = - params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - return copy; + return { + .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = image->GetAspectMask(), + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(params.GetNumLayers()), + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = params.GetMipWidth(level), + .height = params.GetMipHeight(level), + .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + }, + }; } VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { @@ -382,7 +386,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. std::swap(swizzle[0], swizzle[2]); } @@ -394,11 +398,11 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; switch (params.pixel_format) { - case VideoCore::Surface::PixelFormat::Z24S8: - case VideoCore::Surface::PixelFormat::Z32FS8: + case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: + case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; - case VideoCore::Surface::PixelFormat::S8Z24: + case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: @@ -418,20 +422,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc ASSERT(num_slices == params.depth); } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.viewType = image_view_type; - ci.format = surface.GetImage().GetFormat(); - ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; - ci.subresourceRange.aspectMask = aspect; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; - image_view = device.GetLogical().CreateImageView(ci); + image_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = image_view_type, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = swizzle[0], + .g = swizzle[1], + .b = swizzle[2], + .a = swizzle[3], + }, + .subresourceRange = + { + .aspectMask = aspect, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = base_layer, + .layerCount = num_layers, + }, + }); return last_image_view = *image_view; } @@ -441,17 +454,29 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.format = surface.GetImage().GetFormat(); - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = aspect_mask; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = VK_IMAGE_VIEW_TYPE_1D, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = 0, + .layerCount = 0, + }, + }; if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; ci.subresourceRange.baseArrayLayer = base_slice; @@ -465,19 +490,21 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } -VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool) - : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, - staging_pool{staging_pool} {} +VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const VKDevice& device_, + VKResourceManager& resource_manager_, + VKMemoryManager& memory_manager_, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_) + : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()), + device{device_}, resource_manager{resource_manager_}, + memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} {} VKTextureCache::~VKTextureCache() = default; Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager, - scheduler, staging_pool, gpu_addr, params); + return std::make_shared<CachedSurface>(device, resource_manager, memory_manager, scheduler, + staging_pool, gpu_addr, params); } void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, @@ -504,24 +531,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy copy; - copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); - copy.srcSubresource.mipLevel = copy_params.source_level; - copy.srcSubresource.baseArrayLayer = copy_params.source_z; - copy.srcSubresource.layerCount = num_layers; - copy.srcOffset.x = copy_params.source_x; - copy.srcOffset.y = copy_params.source_y; - copy.srcOffset.z = 0; - copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); - copy.dstSubresource.mipLevel = copy_params.dest_level; - copy.dstSubresource.baseArrayLayer = dst_base_layer; - copy.dstSubresource.layerCount = num_layers; - copy.dstOffset.x = copy_params.dest_x; - copy.dstOffset.y = copy_params.dest_y; - copy.dstOffset.z = dst_offset_z; - copy.extent.width = copy_params.width; - copy.extent.height = copy_params.height; - copy.extent.depth = extent_z; + const VkImageCopy copy{ + .srcSubresource = + { + .aspectMask = src_surface->GetAspectMask(), + .mipLevel = copy_params.source_level, + .baseArrayLayer = copy_params.source_z, + .layerCount = num_layers, + }, + .srcOffset = + { + .x = static_cast<s32>(copy_params.source_x), + .y = static_cast<s32>(copy_params.source_y), + .z = 0, + }, + .dstSubresource = + { + .aspectMask = dst_surface->GetAspectMask(), + .mipLevel = copy_params.dest_level, + .baseArrayLayer = dst_base_layer, + .layerCount = num_layers, + }, + .dstOffset = + { + .x = static_cast<s32>(copy_params.dest_x), + .y = static_cast<s32>(copy_params.dest_y), + .z = static_cast<s32>(dst_offset_z), + }, + .extent = + { + .width = copy_params.width, + .height = copy_params.height, + .depth = extent_z, + }, + }; const VkImage src_image = src_surface->GetImageHandle(); const VkImage dst_image = dst_surface->GetImageHandle(); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 807e26c8a..e47d02c41 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -15,10 +15,6 @@ #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" -namespace Core { -class System; -} - namespace VideoCore { class RasterizerInterface; } @@ -45,10 +41,10 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> { friend CachedSurfaceView; public: - explicit CachedSurface(Core::System& system, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool, - GPUVAddr gpu_addr, const SurfaceParams& params); + explicit CachedSurface(const VKDevice& device, VKResourceManager& resource_manager, + VKMemoryManager& memory_manager, VKScheduler& scheduler, + VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, + const SurfaceParams& params); ~CachedSurface(); void UploadTexture(const std::vector<u8>& staging_buffer) override; @@ -101,7 +97,6 @@ private: VkImageSubresourceRange GetImageSubresourceRange() const; - Core::System& system; const VKDevice& device; VKResourceManager& resource_manager; VKMemoryManager& memory_manager; @@ -201,7 +196,8 @@ private: class VKTextureCache final : public TextureCacheBase { public: - explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 051298cc8..013865aa4 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -377,24 +377,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, InstanceDispatch& dld) noexcept { - VkApplicationInfo application_info; - application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - application_info.pNext = nullptr; - application_info.pApplicationName = "yuzu Emulator"; - application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.pEngineName = "yuzu Emulator"; - application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.apiVersion = VK_API_VERSION_1_1; - - VkInstanceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.pApplicationInfo = &application_info; - ci.enabledLayerCount = layers.size(); - ci.ppEnabledLayerNames = layers.data(); - ci.enabledExtensionCount = extensions.size(); - ci.ppEnabledExtensionNames = extensions.data(); + static constexpr VkApplicationInfo application_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "yuzu Emulator", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "yuzu Emulator", + .engineVersion = VK_MAKE_VERSION(0, 1, 0), + .apiVersion = VK_API_VERSION_1_1, + }; + + const VkInstanceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &application_info, + .enabledLayerCount = layers.size(), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = extensions.size(), + .ppEnabledExtensionNames = extensions.data(), + }; VkInstance instance; if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { @@ -425,19 +427,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices( DebugCallback Instance::TryCreateDebugCallback( PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - VkDebugUtilsMessengerCreateInfoEXT ci; - ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - ci.pNext = nullptr; - ci.flags = 0; - ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - ci.pfnUserCallback = callback; - ci.pUserData = nullptr; + const VkDebugUtilsMessengerCreateInfoEXT ci{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = callback, + .pUserData = nullptr, + }; VkDebugUtilsMessengerEXT messenger; if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { @@ -468,12 +471,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { - VkCommandBufferAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.commandPool = handle; - ai.level = level; - ai.commandBufferCount = static_cast<u32>(num_buffers); + const VkCommandBufferAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = handle, + .level = level, + .commandBufferCount = static_cast<u32>(num_buffers), + }; std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { @@ -497,17 +501,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept { - VkDeviceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - ci.pNext = next; - ci.flags = 0; - ci.queueCreateInfoCount = queues_ci.size(); - ci.pQueueCreateInfos = queues_ci.data(); - ci.enabledLayerCount = 0; - ci.ppEnabledLayerNames = nullptr; - ci.enabledExtensionCount = enabled_extensions.size(); - ci.ppEnabledExtensionNames = enabled_extensions.data(); - ci.pEnabledFeatures = nullptr; + const VkDeviceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = next, + .flags = 0, + .queueCreateInfoCount = queues_ci.size(), + .pQueueCreateInfos = queues_ci.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = enabled_extensions.size(), + .ppEnabledExtensionNames = enabled_extensions.data(), + .pEnabledFeatures = nullptr, + }; VkDevice device; if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { @@ -548,10 +553,11 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { } Semaphore Device::CreateSemaphore() const { - VkSemaphoreCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkSemaphoreCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; VkSemaphore object; Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); @@ -638,11 +644,13 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons return ShaderModule(object, handle, *dld); } -Event Device::CreateEvent() const { - VkEventCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; +Event Device::CreateNewEvent() const { + static constexpr VkEventCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + VkEvent object; Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); return Event(object, handle, *dld); @@ -778,7 +786,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp VK_SUCCESS) { return std::nullopt; } - return properties; + return std::move(properties); } std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 71daac9d7..b9d3fedc1 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -721,7 +721,7 @@ public: ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; - Event CreateEvent() const; + Event CreateNewEvent() const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; @@ -756,8 +756,8 @@ public: } VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, - void* data, VkDeviceSize stride, VkQueryResultFlags flags) const - noexcept { + void* data, VkDeviceSize stride, + VkQueryResultFlags flags) const noexcept { return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, flags); } @@ -849,8 +849,8 @@ public: dld->vkCmdBindPipeline(handle, bind_point, pipeline); } - void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const - noexcept { + void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, + VkIndexType index_type) const noexcept { dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type); } @@ -863,8 +863,8 @@ public: BindVertexBuffers(binding, 1, &buffer, &offset); } - void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const - noexcept { + void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, + u32 first_instance) const noexcept { dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance); } @@ -874,15 +874,15 @@ public: first_instance); } - void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const - noexcept { + void ClearAttachments(Span<VkClearAttachment> attachments, + Span<VkClearRect> rects) const noexcept { dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), rects.data()); } void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, - VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const - noexcept { + VkImageLayout dst_layout, Span<VkImageBlit> regions, + VkFilter filter) const noexcept { dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), regions.data(), filter); } @@ -907,8 +907,8 @@ public: regions.data()); } - void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const - noexcept { + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + Span<VkBufferCopy> regions) const noexcept { dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data()); } @@ -924,8 +924,8 @@ public: regions.data()); } - void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const - noexcept { + void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, + u32 data) const noexcept { dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); } diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..aabd62c5c --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp @@ -0,0 +1,221 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <condition_variable> +#include <mutex> +#include <thread> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/shader/async_shaders.h" + +namespace VideoCommon::Shader { + +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} + +AsyncShaders::~AsyncShaders() { + KillWorkers(); +} + +void AsyncShaders::AllocateWorkers() { + // Max worker threads we should allow + constexpr u32 MAX_THREADS = 4; + // Deduce how many threads we can use + const u32 threads_used = std::thread::hardware_concurrency() / 4; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1U, threads_used); + // Don't use more than MAX_THREADS + const auto num_workers = std::min(max_worker_count, MAX_THREADS); + + // If we already have workers queued, ignore + if (num_workers == worker_threads.size()) { + return; + } + + // If workers already exist, clear them + if (!worker_threads.empty()) { + FreeWorkers(); + } + + // Create workers + for (std::size_t i = 0; i < num_workers; i++) { + context_list.push_back(emu_window.CreateSharedContext()); + worker_threads.push_back( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); + } +} + +void AsyncShaders::FreeWorkers() { + // Mark all threads to quit + is_thread_exiting.store(true); + cv.notify_all(); + for (auto& thread : worker_threads) { + thread.join(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +void AsyncShaders::KillWorkers() { + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.detach(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +bool AsyncShaders::HasWorkQueued() const { + return !pending_queue.empty(); +} + +bool AsyncShaders::HasCompletedWork() const { + std::shared_lock lock{completed_mutex}; + return !finished_work.empty(); +} + +bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { + const auto& regs = gpu.Maxwell3D().regs; + + // If something is using depth, we can assume that games are not rendering anything which will + // be used one time. + if (regs.zeta_enable) { + return true; + } + + // If games are using a small index count, we can assume these are full screen quads. Usually + // these shaders are only used once for building textures so we can assume they can't be built + // async + if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { + return false; + } + + return true; +} + +std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { + std::vector<Result> results; + { + std::unique_lock lock{completed_mutex}; + results.assign(std::make_move_iterator(finished_work.begin()), + std::make_move_iterator(finished_work.end())); + finished_work.clear(); + } + return results; +} + +void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, + Tegra::Engines::ShaderType shader_type, u64 uid, + std::vector<u64> code, std::vector<u64> code_b, + u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, + VAddr cpu_addr) { + WorkerParams params{ + .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, + .device = &device, + .shader_type = shader_type, + .uid = uid, + .code = std::move(code), + .code_b = std::move(code_b), + .main_offset = main_offset, + .compiler_settings = compiler_settings, + .registry = registry, + .cpu_address = cpu_addr, + }; + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, + Vulkan::GraphicsPipelineCacheKey key) { + WorkerParams params{ + .backend = Backend::Vulkan, + .pp_cache = pp_cache, + .vk_device = &device, + .scheduler = &scheduler, + .descriptor_pool = &descriptor_pool, + .update_descriptor_queue = &update_descriptor_queue, + .renderpass_cache = &renderpass_cache, + .bindings = bindings, + .program = program, + .key = key, + }; + + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { + while (!is_thread_exiting.load(std::memory_order_relaxed)) { + std::unique_lock lock{queue_mutex}; + cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); + if (is_thread_exiting) { + return; + } + + // Partial lock to allow all threads to read at the same time + if (!HasWorkQueued()) { + continue; + } + // Another thread beat us, just unlock and wait for the next load + if (pending_queue.empty()) { + continue; + } + + // Pull work from queue + WorkerParams work = std::move(pending_queue.front()); + pending_queue.pop(); + lock.unlock(); + + if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); + const auto scope = context->Acquire(); + auto program = + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); + Result result{}; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; + + if (work.backend == Backend::OpenGL) { + result.program.opengl = std::move(program->source_program); + } else if (work.backend == Backend::GLASM) { + result.program.glasm = std::move(program->assembly_program); + } + + { + std::unique_lock complete_lock(completed_mutex); + finished_work.push_back(std::move(result)); + } + } else if (work.backend == Backend::Vulkan) { + auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( + *work.vk_device, *work.scheduler, *work.descriptor_pool, + *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, + work.program); + + work.pp_cache->EmplacePipeline(std::move(pipeline)); + } + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..7cf8d994c --- /dev/null +++ b/src/video_core/shader/async_shaders.h @@ -0,0 +1,136 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <condition_variable> +#include <memory> +#include <shared_mutex> +#include <thread> + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Core::Frontend { +class EmuWindow; +class GraphicsContext; +} // namespace Core::Frontend + +namespace Tegra { +class GPU; +} + +namespace Vulkan { +class VKPipelineCache; +} + +namespace VideoCommon::Shader { + +class AsyncShaders { +public: + enum class Backend { + OpenGL, + GLASM, + Vulkan, + }; + + struct ResultPrograms { + OpenGL::OGLProgram opengl; + OpenGL::OGLAssemblyProgram glasm; + }; + + struct Result { + u64 uid; + VAddr cpu_address; + Backend backend; + ResultPrograms program; + std::vector<u64> code; + std::vector<u64> code_b; + Tegra::Engines::ShaderType shader_type; + }; + + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + ~AsyncShaders(); + + /// Start up shader worker threads + void AllocateWorkers(); + + /// Clear the shader queue and kill all worker threads + void FreeWorkers(); + + // Force end all threads + void KillWorkers(); + + /// Check to see if any shaders have actually been compiled + [[nodiscard]] bool HasCompletedWork() const; + + /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build + /// every shader async as some shaders are only built and executed once. We try to "guess" which + /// shader would be used only once + [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; + + /// Pulls completed compiled shaders + [[nodiscard]] std::vector<Result> GetCompletedWork(); + + void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, + u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, + CompilerSettings compiler_settings, const Registry& registry, + VAddr cpu_addr); + + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + +private: + void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); + + /// Check our worker queue to see if we have any work queued already + [[nodiscard]] bool HasWorkQueued() const; + + struct WorkerParams { + Backend backend; + // For OGL + const OpenGL::Device* device; + Tegra::Engines::ShaderType shader_type; + u64 uid; + std::vector<u64> code; + std::vector<u64> code_b; + u32 main_offset; + CompilerSettings compiler_settings; + std::optional<Registry> registry; + VAddr cpu_address; + + // For Vulkan + Vulkan::VKPipelineCache* pp_cache; + const Vulkan::VKDevice* vk_device; + Vulkan::VKScheduler* scheduler; + Vulkan::VKDescriptorPool* descriptor_pool; + Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; + Vulkan::VKRenderPassCache* renderpass_cache; + std::vector<VkDescriptorSetLayoutBinding> bindings; + Vulkan::SPIRVProgram program; + Vulkan::GraphicsPipelineCacheKey key; + }; + + std::condition_variable cv; + mutable std::mutex queue_mutex; + mutable std::shared_mutex completed_mutex; + std::atomic<bool> is_thread_exiting{}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; + std::vector<std::thread> worker_threads; + std::queue<WorkerParams> pending_queue; + std::vector<Result> finished_work; + Core::Frontend::EmuWindow& emu_window; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 8d86020f6..336397cdb 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, u64 ldc_tracked_register) { - return TrackInstruction<u64>(state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); + return TrackInstruction<u64>( + state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); } std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, u64 shl_tracked_register) { - return TrackInstruction<u32>(state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); - }); + return TrackInstruction<u32>( + state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); } std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index a041519b7..73155966f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - const Node value = [&]() { - const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); + const Node value = [&] { + Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); if (opcode->get().GetId() != OpCode::Id::IADD3_R) { return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); } - const Node shifted = [&]() { + const Node shifted = [&] { switch (instr.iadd3.mode) { case Tegra::Shader::IAdd3Mode::RightShift: // TODO(tech4me): According to diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 07778dc3e..e75ca4fdb 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, std::size_t component) { const TextureFormat format{descriptor.format}; switch (format) { - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32_B32: - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: + case TextureFormat::R16G16B16A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R32G32B32: + case TextureFormat::R32G32: + case TextureFormat::R16G16: case TextureFormat::R32: case TextureFormat::R16: case TextureFormat::R8: @@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 0) { return descriptor.b_type; } @@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, return descriptor.r_type; } break; - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: if (component == 0) { return descriptor.g_type; @@ -137,15 +137,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { u32 GetComponentSize(TextureFormat format, std::size_t component) { switch (format) { - case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32G32B32A32: return 32; - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R16G16B16A16: return 16; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: return component <= 2 ? 32 : 0; - case TextureFormat::R32_G32: + case TextureFormat::R32G32: return component <= 1 ? 32 : 0; - case TextureFormat::R16_G16: + case TextureFormat::R16G16: return component <= 1 ? 16 : 0; case TextureFormat::R32: return component == 0 ? 32 : 0; @@ -192,7 +192,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 1 || component == 2) { return 11; } @@ -200,7 +200,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 10; } return 0; - case TextureFormat::G8R24: + case TextureFormat::R24G8: if (component == 0) { return 8; } @@ -208,7 +208,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G24R8: + case TextureFormat::R8G24: if (component == 0) { return 8; } @@ -216,7 +216,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G8R8: + case TextureFormat::R8G8: return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; @@ -231,25 +231,25 @@ std::size_t GetImageComponentMask(TextureFormat format) { constexpr u8 B = 0b0100; constexpr u8 A = 0b1000; switch (format) { - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R16G16B16A16: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A4B4G4R4: case TextureFormat::A5B5G5R1: case TextureFormat::A1B5G5R5: return std::size_t{R | G | B | A}; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: return std::size_t{R | G | B}; - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R32G32: + case TextureFormat::R16G16: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: return std::size_t{R | G}; case TextureFormat::R32: diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 63adbc4a3..e2bba88dd 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", + static_cast<int>(instr.red.type.Value())); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { @@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT_OR_EXECUTE_MSG(base_address != nullptr, - { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); + ASSERT_OR_EXECUTE_MSG( + base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c0a8f233f..29a7cfbfe 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::LaneId: - LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); - return Immediate(0U); + return Operation(OperationCode::ThreadId); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 64ba60ea2..1c0957277 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { return pc; } -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, - Tegra::Shader::VideoType type, u64 byte_height) { +Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, + u64 byte_height) { if (!is_chunk) { return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); } - const Node zero = Immediate(0); switch (type) { - case Tegra::Shader::VideoType::Size16_Low: + case VideoType::Size16_Low: return BitfieldExtract(op, 0, 16); - case Tegra::Shader::VideoType::Size16_High: + case VideoType::Size16_High: return BitfieldExtract(op, 16, 16); - case Tegra::Shader::VideoType::Size32: + case VideoType::Size32: // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. UNIMPLEMENTED(); - return zero; - case Tegra::Shader::VideoType::Invalid: + return Immediate(0); + case VideoType::Invalid: UNREACHABLE_MSG("Invalid instruction encoding"); - return zero; + return Immediate(0); default: UNREACHABLE(); - return zero; + return Immediate(0); } } diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index c83dc6615..233b8fa42 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { SetTemporary(bb, 0, product); product = GetTemporary(0); - const Node original_c = op_c; + Node original_c = op_c; const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&]() { + op_c = [&] { switch (set_mode) { case Tegra::Shader::XmadMode::None: return original_c; case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(original_c, 0, 16); + return BitfieldExtract(std::move(original_c), 0, 16); case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(original_c, 16, 16); + return BitfieldExtract(std::move(original_c), 16, 16); case Tegra::Shader::XmadMode::CBcc: { - const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); + Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, + original_b, Immediate(16)); + return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), + std::move(shifted_b)); } case Tegra::Shader::XmadMode::CSfu: { const Node comp_a = diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp index 5071c83ca..e18ccba8e 100644 --- a/src/video_core/shader/memory_util.cpp +++ b/src/video_core/shader/memory_util.cpp @@ -16,11 +16,10 @@ namespace VideoCommon::Shader { -GPUVAddr GetShaderAddress(Core::System& system, +GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& gpu{system.GPU().Maxwell3D()}; - const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; - return gpu.regs.code_address.CodeAddress() + shader_config.offset; + const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; + return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; } bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h index be90d24fd..4624d38e6 100644 --- a/src/video_core/shader/memory_util.h +++ b/src/video_core/shader/memory_util.h @@ -11,10 +11,6 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" -namespace Core { -class System; -} - namespace Tegra { class MemoryManager; } @@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Core::System& system, +GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); /// Gets if the current instruction offset is a scheduler instruction diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index e322c3402..29d794b34 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff } Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - const Node node = MakeNode<InternalFlagNode>(flag); + Node node = MakeNode<InternalFlagNode>(flag); if (negated) { - return Operation(OperationCode::LogicalNegate, node); + return Operation(OperationCode::LogicalNegate, std::move(node)); } return node; } diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index b7608fc7b..015a789d6 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -209,11 +209,11 @@ private: } // Remove them from the cache - const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) { + const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) { return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != removed_shaders.end(); }; - storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end()); + std::erase_if(storage, is_removed); } /// @brief Creates a new entry in the lookup cache and returns its pointer diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp new file mode 100644 index 000000000..c3c71657d --- /dev/null +++ b/src/video_core/shader_notify.cpp @@ -0,0 +1,42 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader_notify.h" + +using namespace std::chrono_literals; + +namespace VideoCore { +namespace { +constexpr auto UPDATE_TICK = 32ms; +} + +ShaderNotify::ShaderNotify() = default; +ShaderNotify::~ShaderNotify() = default; + +std::size_t ShaderNotify::GetShadersBuilding() { + const auto now = std::chrono::high_resolution_clock::now(); + const auto diff = now - last_update; + if (diff > UPDATE_TICK) { + std::shared_lock lock(mutex); + last_updated_count = accurate_count; + } + return last_updated_count; +} + +std::size_t ShaderNotify::GetShadersBuildingAccurate() { + std::shared_lock lock{mutex}; + return accurate_count; +} + +void ShaderNotify::MarkShaderComplete() { + std::unique_lock lock{mutex}; + accurate_count--; +} + +void ShaderNotify::MarkSharderBuilding() { + std::unique_lock lock{mutex}; + accurate_count++; +} + +} // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h new file mode 100644 index 000000000..a9c92d179 --- /dev/null +++ b/src/video_core/shader_notify.h @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <shared_mutex> +#include "common/common_types.h" + +namespace VideoCore { +class ShaderNotify { +public: + ShaderNotify(); + ~ShaderNotify(); + + std::size_t GetShadersBuilding(); + std::size_t GetShadersBuildingAccurate(); + + void MarkShaderComplete(); + void MarkSharderBuilding(); + +private: + std::size_t last_updated_count{}; + std::size_t accurate_count{}; + std::shared_mutex mutex; + std::chrono::high_resolution_clock::time_point last_update{}; +}; +} // namespace VideoCore diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index bbe93903c..1688267bb 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -74,117 +74,131 @@ bool SurfaceTargetIsArray(SurfaceTarget target) { PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { switch (format) { - case Tegra::DepthFormat::S8_Z24_UNORM: - return PixelFormat::S8Z24; - case Tegra::DepthFormat::Z24_S8_UNORM: - return PixelFormat::Z24S8; - case Tegra::DepthFormat::Z32_FLOAT: - return PixelFormat::Z32F; - case Tegra::DepthFormat::Z16_UNORM: - return PixelFormat::Z16; - case Tegra::DepthFormat::Z32_S8_X24_FLOAT: - return PixelFormat::Z32FS8; + case Tegra::DepthFormat::S8_UINT_Z24_UNORM: + return PixelFormat::S8_UINT_D24_UNORM; + case Tegra::DepthFormat::D24S8_UNORM: + return PixelFormat::D24_UNORM_S8_UINT; + case Tegra::DepthFormat::D32_FLOAT: + return PixelFormat::D32_FLOAT; + case Tegra::DepthFormat::D16_UNORM: + return PixelFormat::D16_UNORM; + case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: + return PixelFormat::D32_FLOAT_S8_UINT; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return PixelFormat::S8Z24; + UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); + return PixelFormat::S8_UINT_D24_UNORM; } } PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { - case Tegra::RenderTargetFormat::RGBA8_SRGB: - return PixelFormat::RGBA8_SRGB; - case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8U; - case Tegra::RenderTargetFormat::RGBA8_SNORM: - return PixelFormat::ABGR8S; - case Tegra::RenderTargetFormat::RGBA8_UINT: - return PixelFormat::ABGR8UI; - case Tegra::RenderTargetFormat::BGRA8_SRGB: - return PixelFormat::BGRA8_SRGB; - case Tegra::RenderTargetFormat::BGRA8_UNORM: - return PixelFormat::BGRA8; - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10U; - case Tegra::RenderTargetFormat::RGBA16_FLOAT: - return PixelFormat::RGBA16F; - case Tegra::RenderTargetFormat::RGBA16_UNORM: - return PixelFormat::RGBA16U; - case Tegra::RenderTargetFormat::RGBA16_SNORM: - return PixelFormat::RGBA16S; - case Tegra::RenderTargetFormat::RGBA16_UINT: - return PixelFormat::RGBA16UI; - case Tegra::RenderTargetFormat::RGBA32_FLOAT: - return PixelFormat::RGBA32F; - case Tegra::RenderTargetFormat::RG32_FLOAT: - return PixelFormat::RG32F; - case Tegra::RenderTargetFormat::R11G11B10_FLOAT: - return PixelFormat::R11FG11FB10F; - case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5U; - case Tegra::RenderTargetFormat::BGR5A1_UNORM: - return PixelFormat::A1B5G5R5U; - case Tegra::RenderTargetFormat::RGBA32_UINT: - return PixelFormat::RGBA32UI; - case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8U; - case Tegra::RenderTargetFormat::R8_UINT: - return PixelFormat::R8UI; - case Tegra::RenderTargetFormat::RG16_FLOAT: - return PixelFormat::RG16F; - case Tegra::RenderTargetFormat::RG16_UINT: - return PixelFormat::RG16UI; - case Tegra::RenderTargetFormat::RG16_SINT: - return PixelFormat::RG16I; - case Tegra::RenderTargetFormat::RG16_UNORM: - return PixelFormat::RG16; - case Tegra::RenderTargetFormat::RG16_SNORM: - return PixelFormat::RG16S; - case Tegra::RenderTargetFormat::RG8_UNORM: - return PixelFormat::RG8U; - case Tegra::RenderTargetFormat::RG8_SNORM: - return PixelFormat::RG8S; - case Tegra::RenderTargetFormat::RG8_UINT: - return PixelFormat::RG8UI; - case Tegra::RenderTargetFormat::R16_FLOAT: - return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: + return PixelFormat::R32G32B32A32_FLOAT; + case Tegra::RenderTargetFormat::R32G32B32A32_SINT: + return PixelFormat::R32G32B32A32_SINT; + case Tegra::RenderTargetFormat::R32G32B32A32_UINT: + return PixelFormat::R32G32B32A32_UINT; + case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: + return PixelFormat::R16G16B16A16_UNORM; + case Tegra::RenderTargetFormat::R16G16B16A16_SNORM: + return PixelFormat::R16G16B16A16_SNORM; + case Tegra::RenderTargetFormat::R16G16B16A16_SINT: + return PixelFormat::R16G16B16A16_SINT; + case Tegra::RenderTargetFormat::R16G16B16A16_UINT: + return PixelFormat::R16G16B16A16_UINT; + case Tegra::RenderTargetFormat::R16G16B16A16_FLOAT: + return PixelFormat::R16G16B16A16_FLOAT; + case Tegra::RenderTargetFormat::R32G32_FLOAT: + return PixelFormat::R32G32_FLOAT; + case Tegra::RenderTargetFormat::R32G32_SINT: + return PixelFormat::R32G32_SINT; + case Tegra::RenderTargetFormat::R32G32_UINT: + return PixelFormat::R32G32_UINT; + case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: + return PixelFormat::R16G16B16X16_FLOAT; + case Tegra::RenderTargetFormat::B8G8R8A8_UNORM: + return PixelFormat::B8G8R8A8_UNORM; + case Tegra::RenderTargetFormat::B8G8R8A8_SRGB: + return PixelFormat::B8G8R8A8_SRGB; + case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: + return PixelFormat::A2B10G10R10_UNORM; + case Tegra::RenderTargetFormat::A2B10G10R10_UINT: + return PixelFormat::A2B10G10R10_UINT; + case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: + return PixelFormat::A8B8G8R8_UNORM; + case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: + return PixelFormat::A8B8G8R8_SRGB; + case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: + return PixelFormat::A8B8G8R8_SNORM; + case Tegra::RenderTargetFormat::A8B8G8R8_SINT: + return PixelFormat::A8B8G8R8_SINT; + case Tegra::RenderTargetFormat::A8B8G8R8_UINT: + return PixelFormat::A8B8G8R8_UINT; + case Tegra::RenderTargetFormat::R16G16_UNORM: + return PixelFormat::R16G16_UNORM; + case Tegra::RenderTargetFormat::R16G16_SNORM: + return PixelFormat::R16G16_SNORM; + case Tegra::RenderTargetFormat::R16G16_SINT: + return PixelFormat::R16G16_SINT; + case Tegra::RenderTargetFormat::R16G16_UINT: + return PixelFormat::R16G16_UINT; + case Tegra::RenderTargetFormat::R16G16_FLOAT: + return PixelFormat::R16G16_FLOAT; + case Tegra::RenderTargetFormat::B10G11R11_FLOAT: + return PixelFormat::B10G11R11_FLOAT; + case Tegra::RenderTargetFormat::R32_SINT: + return PixelFormat::R32_SINT; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32_UINT; + case Tegra::RenderTargetFormat::R32_FLOAT: + return PixelFormat::R32_FLOAT; + case Tegra::RenderTargetFormat::R5G6B5_UNORM: + return PixelFormat::R5G6B5_UNORM; + case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: + return PixelFormat::A1R5G5B5_UNORM; + case Tegra::RenderTargetFormat::R8G8_UNORM: + return PixelFormat::R8G8_UNORM; + case Tegra::RenderTargetFormat::R8G8_SNORM: + return PixelFormat::R8G8_SNORM; + case Tegra::RenderTargetFormat::R8G8_SINT: + return PixelFormat::R8G8_SINT; + case Tegra::RenderTargetFormat::R8G8_UINT: + return PixelFormat::R8G8_UINT; case Tegra::RenderTargetFormat::R16_UNORM: - return PixelFormat::R16U; + return PixelFormat::R16_UNORM; case Tegra::RenderTargetFormat::R16_SNORM: - return PixelFormat::R16S; - case Tegra::RenderTargetFormat::R16_UINT: - return PixelFormat::R16UI; + return PixelFormat::R16_SNORM; case Tegra::RenderTargetFormat::R16_SINT: - return PixelFormat::R16I; - case Tegra::RenderTargetFormat::R32_FLOAT: - return PixelFormat::R32F; - case Tegra::RenderTargetFormat::R32_SINT: - return PixelFormat::R32I; - case Tegra::RenderTargetFormat::R32_UINT: - return PixelFormat::R32UI; - case Tegra::RenderTargetFormat::RG32_UINT: - return PixelFormat::RG32UI; - case Tegra::RenderTargetFormat::RGBX16_FLOAT: - return PixelFormat::RGBX16F; + return PixelFormat::R16_SINT; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16_UINT; + case Tegra::RenderTargetFormat::R16_FLOAT: + return PixelFormat::R16_FLOAT; + case Tegra::RenderTargetFormat::R8_UNORM: + return PixelFormat::R8_UNORM; + case Tegra::RenderTargetFormat::R8_SNORM: + return PixelFormat::R8_SNORM; + case Tegra::RenderTargetFormat::R8_SINT: + return PixelFormat::R8_SINT; + case Tegra::RenderTargetFormat::R8_UINT: + return PixelFormat::R8_UINT; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return PixelFormat::RGBA8_SRGB; + UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format)); + return PixelFormat::A8B8G8R8_UNORM; } } PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8U; - case Tegra::FramebufferConfig::PixelFormat::RGB565: - return PixelFormat::B5G6R5U; - case Tegra::FramebufferConfig::PixelFormat::BGRA8: - return PixelFormat::BGRA8; + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: + return PixelFormat::A8B8G8R8_UNORM; + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: + return PixelFormat::R5G6B5_UNORM; + case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: + return PixelFormat::B8G8R8A8_UNORM; default: UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); - return PixelFormat::ABGR8U; + return PixelFormat::A8B8G8R8_UNORM; } } @@ -212,27 +226,27 @@ SurfaceType GetFormatType(PixelFormat pixel_format) { bool IsPixelFormatASTC(PixelFormat format) { switch (format) { - case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_5X4: - case PixelFormat::ASTC_2D_5X5: - case PixelFormat::ASTC_2D_8X8: - case PixelFormat::ASTC_2D_8X5: + case PixelFormat::ASTC_2D_4X4_UNORM: + case PixelFormat::ASTC_2D_5X4_UNORM: + case PixelFormat::ASTC_2D_5X5_UNORM: + case PixelFormat::ASTC_2D_8X8_UNORM: + case PixelFormat::ASTC_2D_8X5_UNORM: case PixelFormat::ASTC_2D_4X4_SRGB: case PixelFormat::ASTC_2D_5X4_SRGB: case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: - case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_UNORM: case PixelFormat::ASTC_2D_10X8_SRGB: - case PixelFormat::ASTC_2D_6X6: + case PixelFormat::ASTC_2D_6X6_UNORM: case PixelFormat::ASTC_2D_6X6_SRGB: - case PixelFormat::ASTC_2D_10X10: + case PixelFormat::ASTC_2D_10X10_UNORM: case PixelFormat::ASTC_2D_10X10_SRGB: - case PixelFormat::ASTC_2D_12X12: + case PixelFormat::ASTC_2D_12X12_UNORM: case PixelFormat::ASTC_2D_12X12_SRGB: - case PixelFormat::ASTC_2D_8X6: + case PixelFormat::ASTC_2D_8X6_UNORM: case PixelFormat::ASTC_2D_8X6_SRGB: - case PixelFormat::ASTC_2D_6X5: + case PixelFormat::ASTC_2D_6X5_UNORM: case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: @@ -242,12 +256,12 @@ bool IsPixelFormatASTC(PixelFormat format) { bool IsPixelFormatSRGB(PixelFormat format) { switch (format) { - case PixelFormat::RGBA8_SRGB: - case PixelFormat::BGRA8_SRGB: - case PixelFormat::DXT1_SRGB: - case PixelFormat::DXT23_SRGB: - case PixelFormat::DXT45_SRGB: - case PixelFormat::BC7U_SRGB: + case PixelFormat::A8B8G8R8_SRGB: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC2_SRGB: + case PixelFormat::BC3_SRGB: + case PixelFormat::BC7_SRGB: case PixelFormat::ASTC_2D_4X4_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: @@ -269,25 +283,4 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; } -bool IsFormatBCn(PixelFormat format) { - switch (format) { - case PixelFormat::DXT1: - case PixelFormat::DXT23: - case PixelFormat::DXT45: - case PixelFormat::DXN1: - case PixelFormat::DXN2SNORM: - case PixelFormat::DXN2UNORM: - case PixelFormat::BC7U: - case PixelFormat::BC6H_UF16: - case PixelFormat::BC6H_SF16: - case PixelFormat::DXT1_SRGB: - case PixelFormat::DXT23_SRGB: - case PixelFormat::DXT45_SRGB: - case PixelFormat::BC7U_SRGB: - return true; - default: - return false; - } -} - } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 6da6a1b97..cfd12fa61 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -15,94 +15,105 @@ namespace VideoCore::Surface { enum class PixelFormat { - ABGR8U = 0, - ABGR8S = 1, - ABGR8UI = 2, - B5G6R5U = 3, - A2B10G10R10U = 4, - A1B5G5R5U = 5, - R8U = 6, - R8UI = 7, - RGBA16F = 8, - RGBA16U = 9, - RGBA16S = 10, - RGBA16UI = 11, - R11FG11FB10F = 12, - RGBA32UI = 13, - DXT1 = 14, - DXT23 = 15, - DXT45 = 16, - DXN1 = 17, // This is also known as BC4 - DXN2UNORM = 18, - DXN2SNORM = 19, - BC7U = 20, - BC6H_UF16 = 21, - BC6H_SF16 = 22, - ASTC_2D_4X4 = 23, - BGRA8 = 24, - RGBA32F = 25, - RG32F = 26, - R32F = 27, - R16F = 28, - R16U = 29, - R16S = 30, - R16UI = 31, - R16I = 32, - RG16 = 33, - RG16F = 34, - RG16UI = 35, - RG16I = 36, - RG16S = 37, - RGB32F = 38, - RGBA8_SRGB = 39, - RG8U = 40, - RG8S = 41, - RG8UI = 42, - RG32UI = 43, - RGBX16F = 44, - R32UI = 45, - R32I = 46, - ASTC_2D_8X8 = 47, - ASTC_2D_8X5 = 48, - ASTC_2D_5X4 = 49, - BGRA8_SRGB = 50, - DXT1_SRGB = 51, - DXT23_SRGB = 52, - DXT45_SRGB = 53, - BC7U_SRGB = 54, - R4G4B4A4U = 55, - ASTC_2D_4X4_SRGB = 56, - ASTC_2D_8X8_SRGB = 57, - ASTC_2D_8X5_SRGB = 58, - ASTC_2D_5X4_SRGB = 59, - ASTC_2D_5X5 = 60, - ASTC_2D_5X5_SRGB = 61, - ASTC_2D_10X8 = 62, - ASTC_2D_10X8_SRGB = 63, - ASTC_2D_6X6 = 64, - ASTC_2D_6X6_SRGB = 65, - ASTC_2D_10X10 = 66, - ASTC_2D_10X10_SRGB = 67, - ASTC_2D_12X12 = 68, - ASTC_2D_12X12_SRGB = 69, - ASTC_2D_8X6 = 70, - ASTC_2D_8X6_SRGB = 71, - ASTC_2D_6X5 = 72, - ASTC_2D_6X5_SRGB = 73, - E5B9G9R9F = 74, + A8B8G8R8_UNORM, + A8B8G8R8_SNORM, + A8B8G8R8_SINT, + A8B8G8R8_UINT, + R5G6B5_UNORM, + B5G6R5_UNORM, + A1R5G5B5_UNORM, + A2B10G10R10_UNORM, + A2B10G10R10_UINT, + A1B5G5R5_UNORM, + R8_UNORM, + R8_SNORM, + R8_SINT, + R8_UINT, + R16G16B16A16_FLOAT, + R16G16B16A16_UNORM, + R16G16B16A16_SNORM, + R16G16B16A16_SINT, + R16G16B16A16_UINT, + B10G11R11_FLOAT, + R32G32B32A32_UINT, + BC1_RGBA_UNORM, + BC2_UNORM, + BC3_UNORM, + BC4_UNORM, + BC4_SNORM, + BC5_UNORM, + BC5_SNORM, + BC7_UNORM, + BC6H_UFLOAT, + BC6H_SFLOAT, + ASTC_2D_4X4_UNORM, + B8G8R8A8_UNORM, + R32G32B32A32_FLOAT, + R32G32B32A32_SINT, + R32G32_FLOAT, + R32G32_SINT, + R32_FLOAT, + R16_FLOAT, + R16_UNORM, + R16_SNORM, + R16_UINT, + R16_SINT, + R16G16_UNORM, + R16G16_FLOAT, + R16G16_UINT, + R16G16_SINT, + R16G16_SNORM, + R32G32B32_FLOAT, + A8B8G8R8_SRGB, + R8G8_UNORM, + R8G8_SNORM, + R8G8_SINT, + R8G8_UINT, + R32G32_UINT, + R16G16B16X16_FLOAT, + R32_UINT, + R32_SINT, + ASTC_2D_8X8_UNORM, + ASTC_2D_8X5_UNORM, + ASTC_2D_5X4_UNORM, + B8G8R8A8_SRGB, + BC1_RGBA_SRGB, + BC2_SRGB, + BC3_SRGB, + BC7_SRGB, + A4B4G4R4_UNORM, + ASTC_2D_4X4_SRGB, + ASTC_2D_8X8_SRGB, + ASTC_2D_8X5_SRGB, + ASTC_2D_5X4_SRGB, + ASTC_2D_5X5_UNORM, + ASTC_2D_5X5_SRGB, + ASTC_2D_10X8_UNORM, + ASTC_2D_10X8_SRGB, + ASTC_2D_6X6_UNORM, + ASTC_2D_6X6_SRGB, + ASTC_2D_10X10_UNORM, + ASTC_2D_10X10_SRGB, + ASTC_2D_12X12_UNORM, + ASTC_2D_12X12_SRGB, + ASTC_2D_8X6_UNORM, + ASTC_2D_8X6_SRGB, + ASTC_2D_6X5_UNORM, + ASTC_2D_6X5_SRGB, + E5B9G9R9_FLOAT, MaxColorFormat, // Depth formats - Z32F = 75, - Z16 = 76, + D32_FLOAT = MaxColorFormat, + D16_UNORM, MaxDepthFormat, // DepthStencil formats - Z24S8 = 77, - S8Z24 = 78, - Z32FS8 = 79, + D24_UNORM_S8_UINT = MaxDepthFormat, + S8_UINT_D24_UNORM, + D32_FLOAT_S8_UINT, MaxDepthStencilFormat, @@ -130,86 +141,97 @@ enum class SurfaceTarget { }; constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ - 0, // ABGR8U - 0, // ABGR8S - 0, // ABGR8UI - 0, // B5G6R5U - 0, // A2B10G10R10U - 0, // A1B5G5R5U - 0, // R8U - 0, // R8UI - 0, // RGBA16F - 0, // RGBA16U - 0, // RGBA16S - 0, // RGBA16UI - 0, // R11FG11FB10F - 0, // RGBA32UI - 2, // DXT1 - 2, // DXT23 - 2, // DXT45 - 2, // DXN1 - 2, // DXN2UNORM - 2, // DXN2SNORM - 2, // BC7U - 2, // BC6H_UF16 - 2, // BC6H_SF16 - 2, // ASTC_2D_4X4 - 0, // BGRA8 - 0, // RGBA32F - 0, // RG32F - 0, // R32F - 0, // R16F - 0, // R16U - 0, // R16S - 0, // R16UI - 0, // R16I - 0, // RG16 - 0, // RG16F - 0, // RG16UI - 0, // RG16I - 0, // RG16S - 0, // RGB32F - 0, // RGBA8_SRGB - 0, // RG8U - 0, // RG8S - 0, // RG8UI - 0, // RG32UI - 0, // RGBX16F - 0, // R32UI - 0, // R32I - 2, // ASTC_2D_8X8 - 2, // ASTC_2D_8X5 - 2, // ASTC_2D_5X4 - 0, // BGRA8_SRGB - 2, // DXT1_SRGB - 2, // DXT23_SRGB - 2, // DXT45_SRGB - 2, // BC7U_SRGB - 0, // R4G4B4A4U + 0, // A8B8G8R8_UNORM + 0, // A8B8G8R8_SNORM + 0, // A8B8G8R8_SINT + 0, // A8B8G8R8_UINT + 0, // R5G6B5_UNORM + 0, // B5G6R5_UNORM + 0, // A1R5G5B5_UNORM + 0, // A2B10G10R10_UNORM + 0, // A2B10G10R10_UINT + 0, // A1B5G5R5_UNORM + 0, // R8_UNORM + 0, // R8_SNORM + 0, // R8_SINT + 0, // R8_UINT + 0, // R16G16B16A16_FLOAT + 0, // R16G16B16A16_UNORM + 0, // R16G16B16A16_SNORM + 0, // R16G16B16A16_SINT + 0, // R16G16B16A16_UINT + 0, // B10G11R11_FLOAT + 0, // R32G32B32A32_UINT + 2, // BC1_RGBA_UNORM + 2, // BC2_UNORM + 2, // BC3_UNORM + 2, // BC4_UNORM + 2, // BC4_SNORM + 2, // BC5_UNORM + 2, // BC5_SNORM + 2, // BC7_UNORM + 2, // BC6H_UFLOAT + 2, // BC6H_SFLOAT + 2, // ASTC_2D_4X4_UNORM + 0, // B8G8R8A8_UNORM + 0, // R32G32B32A32_FLOAT + 0, // R32G32B32A32_SINT + 0, // R32G32_FLOAT + 0, // R32G32_SINT + 0, // R32_FLOAT + 0, // R16_FLOAT + 0, // R16_UNORM + 0, // R16_SNORM + 0, // R16_UINT + 0, // R16_SINT + 0, // R16G16_UNORM + 0, // R16G16_FLOAT + 0, // R16G16_UINT + 0, // R16G16_SINT + 0, // R16G16_SNORM + 0, // R32G32B32_FLOAT + 0, // A8B8G8R8_SRGB + 0, // R8G8_UNORM + 0, // R8G8_SNORM + 0, // R8G8_SINT + 0, // R8G8_UINT + 0, // R32G32_UINT + 0, // R16G16B16X16_FLOAT + 0, // R32_UINT + 0, // R32_SINT + 2, // ASTC_2D_8X8_UNORM + 2, // ASTC_2D_8X5_UNORM + 2, // ASTC_2D_5X4_UNORM + 0, // B8G8R8A8_SRGB + 2, // BC1_RGBA_SRGB + 2, // BC2_SRGB + 2, // BC3_SRGB + 2, // BC7_SRGB + 0, // A4B4G4R4_UNORM 2, // ASTC_2D_4X4_SRGB 2, // ASTC_2D_8X8_SRGB 2, // ASTC_2D_8X5_SRGB 2, // ASTC_2D_5X4_SRGB - 2, // ASTC_2D_5X5 + 2, // ASTC_2D_5X5_UNORM 2, // ASTC_2D_5X5_SRGB - 2, // ASTC_2D_10X8 + 2, // ASTC_2D_10X8_UNORM 2, // ASTC_2D_10X8_SRGB - 2, // ASTC_2D_6X6 + 2, // ASTC_2D_6X6_UNORM 2, // ASTC_2D_6X6_SRGB - 2, // ASTC_2D_10X10 + 2, // ASTC_2D_10X10_UNORM 2, // ASTC_2D_10X10_SRGB - 2, // ASTC_2D_12X12 + 2, // ASTC_2D_12X12_UNORM 2, // ASTC_2D_12X12_SRGB - 2, // ASTC_2D_8X6 + 2, // ASTC_2D_8X6_UNORM 2, // ASTC_2D_8X6_SRGB - 2, // ASTC_2D_6X5 + 2, // ASTC_2D_6X5_UNORM 2, // ASTC_2D_6X5_SRGB - 0, // E5B9G9R9F - 0, // Z32F - 0, // Z16 - 0, // Z24S8 - 0, // S8Z24 - 0, // Z32FS8 + 0, // E5B9G9R9_FLOAT + 0, // D32_FLOAT + 0, // D16_UNORM + 0, // D24_UNORM_S8_UINT + 0, // S8_UINT_D24_UNORM + 0, // D32_FLOAT_S8_UINT }}; /** @@ -229,86 +251,97 @@ inline constexpr u32 GetCompressionFactor(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16S - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG8UI - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 1, // R32I - 8, // ASTC_2D_8X8 - 8, // ASTC_2D_8X5 - 5, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 1, // R4G4B4A4U + 1, // A8B8G8R8_UNORM + 1, // A8B8G8R8_SNORM + 1, // A8B8G8R8_SINT + 1, // A8B8G8R8_UINT + 1, // R5G6B5_UNORM + 1, // B5G6R5_UNORM + 1, // A1R5G5B5_UNORM + 1, // A2B10G10R10_UNORM + 1, // A2B10G10R10_UINT + 1, // A1B5G5R5_UNORM + 1, // R8_UNORM + 1, // R8_SNORM + 1, // R8_SINT + 1, // R8_UINT + 1, // R16G16B16A16_FLOAT + 1, // R16G16B16A16_UNORM + 1, // R16G16B16A16_SNORM + 1, // R16G16B16A16_SINT + 1, // R16G16B16A16_UINT + 1, // B10G11R11_FLOAT + 1, // R32G32B32A32_UINT + 4, // BC1_RGBA_UNORM + 4, // BC2_UNORM + 4, // BC3_UNORM + 4, // BC4_UNORM + 4, // BC4_SNORM + 4, // BC5_UNORM + 4, // BC5_SNORM + 4, // BC7_UNORM + 4, // BC6H_UFLOAT + 4, // BC6H_SFLOAT + 4, // ASTC_2D_4X4_UNORM + 1, // B8G8R8A8_UNORM + 1, // R32G32B32A32_FLOAT + 1, // R32G32B32A32_SINT + 1, // R32G32_FLOAT + 1, // R32G32_SINT + 1, // R32_FLOAT + 1, // R16_FLOAT + 1, // R16_UNORM + 1, // R16_SNORM + 1, // R16_UINT + 1, // R16_SINT + 1, // R16G16_UNORM + 1, // R16G16_FLOAT + 1, // R16G16_UINT + 1, // R16G16_SINT + 1, // R16G16_SNORM + 1, // R32G32B32_FLOAT + 1, // A8B8G8R8_SRGB + 1, // R8G8_UNORM + 1, // R8G8_SNORM + 1, // R8G8_SINT + 1, // R8G8_UINT + 1, // R32G32_UINT + 1, // R16G16B16X16_FLOAT + 1, // R32_UINT + 1, // R32_SINT + 8, // ASTC_2D_8X8_UNORM + 8, // ASTC_2D_8X5_UNORM + 5, // ASTC_2D_5X4_UNORM + 1, // B8G8R8A8_SRGB + 4, // BC1_RGBA_SRGB + 4, // BC2_SRGB + 4, // BC3_SRGB + 4, // BC7_SRGB + 1, // A4B4G4R4_UNORM 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 8, // ASTC_2D_8X5_SRGB 5, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_UNORM 5, // ASTC_2D_5X5_SRGB - 10, // ASTC_2D_10X8 + 10, // ASTC_2D_10X8_UNORM 10, // ASTC_2D_10X8_SRGB - 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_UNORM 6, // ASTC_2D_6X6_SRGB - 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_UNORM 10, // ASTC_2D_10X10_SRGB - 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_UNORM 12, // ASTC_2D_12X12_SRGB - 8, // ASTC_2D_8X6 + 8, // ASTC_2D_8X6_UNORM 8, // ASTC_2D_8X6_SRGB - 6, // ASTC_2D_6X5 + 6, // ASTC_2D_6X5_UNORM 6, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9F - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // E5B9G9R9_FLOAT + 1, // D32_FLOAT + 1, // D16_UNORM + 1, // D24_UNORM_S8_UINT + 1, // S8_UINT_D24_UNORM + 1, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { @@ -320,86 +353,97 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16S - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG8UI - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 1, // R32I - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 1, // R4G4B4A4U + 1, // A8B8G8R8_UNORM + 1, // A8B8G8R8_SNORM + 1, // A8B8G8R8_SINT + 1, // A8B8G8R8_UINT + 1, // R5G6B5_UNORM + 1, // B5G6R5_UNORM + 1, // A1R5G5B5_UNORM + 1, // A2B10G10R10_UNORM + 1, // A2B10G10R10_UINT + 1, // A1B5G5R5_UNORM + 1, // R8_UNORM + 1, // R8_SNORM + 1, // R8_SINT + 1, // R8_UINT + 1, // R16G16B16A16_FLOAT + 1, // R16G16B16A16_UNORM + 1, // R16G16B16A16_SNORM + 1, // R16G16B16A16_SINT + 1, // R16G16B16A16_UINT + 1, // B10G11R11_FLOAT + 1, // R32G32B32A32_UINT + 4, // BC1_RGBA_UNORM + 4, // BC2_UNORM + 4, // BC3_UNORM + 4, // BC4_UNORM + 4, // BC4_SNORM + 4, // BC5_UNORM + 4, // BC5_SNORM + 4, // BC7_UNORM + 4, // BC6H_UFLOAT + 4, // BC6H_SFLOAT + 4, // ASTC_2D_4X4_UNORM + 1, // B8G8R8A8_UNORM + 1, // R32G32B32A32_FLOAT + 1, // R32G32B32A32_SINT + 1, // R32G32_FLOAT + 1, // R32G32_SINT + 1, // R32_FLOAT + 1, // R16_FLOAT + 1, // R16_UNORM + 1, // R16_SNORM + 1, // R16_UINT + 1, // R16_SINT + 1, // R16G16_UNORM + 1, // R16G16_FLOAT + 1, // R16G16_UINT + 1, // R16G16_SINT + 1, // R16G16_SNORM + 1, // R32G32B32_FLOAT + 1, // A8B8G8R8_SRGB + 1, // R8G8_UNORM + 1, // R8G8_SNORM + 1, // R8G8_SINT + 1, // R8G8_UINT + 1, // R32G32_UINT + 1, // R16G16B16X16_FLOAT + 1, // R32_UINT + 1, // R32_SINT + 8, // ASTC_2D_8X8_UNORM + 5, // ASTC_2D_8X5_UNORM + 4, // ASTC_2D_5X4_UNORM + 1, // B8G8R8A8_SRGB + 4, // BC1_RGBA_SRGB + 4, // BC2_SRGB + 4, // BC3_SRGB + 4, // BC7_SRGB + 1, // A4B4G4R4_UNORM 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 5, // ASTC_2D_8X5_SRGB 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_UNORM 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_UNORM 8, // ASTC_2D_10X8_SRGB - 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_UNORM 6, // ASTC_2D_6X6_SRGB - 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_UNORM 10, // ASTC_2D_10X10_SRGB - 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_UNORM 12, // ASTC_2D_12X12_SRGB - 6, // ASTC_2D_8X6 + 6, // ASTC_2D_8X6_UNORM 6, // ASTC_2D_8X6_SRGB - 5, // ASTC_2D_6X5 + 5, // ASTC_2D_6X5_UNORM 5, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9F - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // E5B9G9R9_FLOAT + 1, // D32_FLOAT + 1, // D16_UNORM + 1, // D24_UNORM_S8_UINT + 1, // S8_UINT_D24_UNORM + 1, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { @@ -411,86 +455,97 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8U - 32, // ABGR8S - 32, // ABGR8UI - 16, // B5G6R5U - 32, // A2B10G10R10U - 16, // A1B5G5R5U - 8, // R8U - 8, // R8UI - 64, // RGBA16F - 64, // RGBA16U - 64, // RGBA16S - 64, // RGBA16UI - 32, // R11FG11FB10F - 128, // RGBA32UI - 64, // DXT1 - 128, // DXT23 - 128, // DXT45 - 64, // DXN1 - 128, // DXN2UNORM - 128, // DXN2SNORM - 128, // BC7U - 128, // BC6H_UF16 - 128, // BC6H_SF16 - 128, // ASTC_2D_4X4 - 32, // BGRA8 - 128, // RGBA32F - 64, // RG32F - 32, // R32F - 16, // R16F - 16, // R16U - 16, // R16S - 16, // R16UI - 16, // R16I - 32, // RG16 - 32, // RG16F - 32, // RG16UI - 32, // RG16I - 32, // RG16S - 96, // RGB32F - 32, // RGBA8_SRGB - 16, // RG8U - 16, // RG8S - 16, // RG8UI - 64, // RG32UI - 64, // RGBX16F - 32, // R32UI - 32, // R32I - 128, // ASTC_2D_8X8 - 128, // ASTC_2D_8X5 - 128, // ASTC_2D_5X4 - 32, // BGRA8_SRGB - 64, // DXT1_SRGB - 128, // DXT23_SRGB - 128, // DXT45_SRGB - 128, // BC7U - 16, // R4G4B4A4U + 32, // A8B8G8R8_UNORM + 32, // A8B8G8R8_SNORM + 32, // A8B8G8R8_SINT + 32, // A8B8G8R8_UINT + 16, // R5G6B5_UNORM + 16, // B5G6R5_UNORM + 16, // A1R5G5B5_UNORM + 32, // A2B10G10R10_UNORM + 32, // A2B10G10R10_UINT + 16, // A1B5G5R5_UNORM + 8, // R8_UNORM + 8, // R8_SNORM + 8, // R8_SINT + 8, // R8_UINT + 64, // R16G16B16A16_FLOAT + 64, // R16G16B16A16_UNORM + 64, // R16G16B16A16_SNORM + 64, // R16G16B16A16_SINT + 64, // R16G16B16A16_UINT + 32, // B10G11R11_FLOAT + 128, // R32G32B32A32_UINT + 64, // BC1_RGBA_UNORM + 128, // BC2_UNORM + 128, // BC3_UNORM + 64, // BC4_UNORM + 64, // BC4_SNORM + 128, // BC5_UNORM + 128, // BC5_SNORM + 128, // BC7_UNORM + 128, // BC6H_UFLOAT + 128, // BC6H_SFLOAT + 128, // ASTC_2D_4X4_UNORM + 32, // B8G8R8A8_UNORM + 128, // R32G32B32A32_FLOAT + 128, // R32G32B32A32_SINT + 64, // R32G32_FLOAT + 64, // R32G32_SINT + 32, // R32_FLOAT + 16, // R16_FLOAT + 16, // R16_UNORM + 16, // R16_SNORM + 16, // R16_UINT + 16, // R16_SINT + 32, // R16G16_UNORM + 32, // R16G16_FLOAT + 32, // R16G16_UINT + 32, // R16G16_SINT + 32, // R16G16_SNORM + 96, // R32G32B32_FLOAT + 32, // A8B8G8R8_SRGB + 16, // R8G8_UNORM + 16, // R8G8_SNORM + 16, // R8G8_SINT + 16, // R8G8_UINT + 64, // R32G32_UINT + 64, // R16G16B16X16_FLOAT + 32, // R32_UINT + 32, // R32_SINT + 128, // ASTC_2D_8X8_UNORM + 128, // ASTC_2D_8X5_UNORM + 128, // ASTC_2D_5X4_UNORM + 32, // B8G8R8A8_SRGB + 64, // BC1_RGBA_SRGB + 128, // BC2_SRGB + 128, // BC3_SRGB + 128, // BC7_UNORM + 16, // A4B4G4R4_UNORM 128, // ASTC_2D_4X4_SRGB 128, // ASTC_2D_8X8_SRGB 128, // ASTC_2D_8X5_SRGB 128, // ASTC_2D_5X4_SRGB - 128, // ASTC_2D_5X5 + 128, // ASTC_2D_5X5_UNORM 128, // ASTC_2D_5X5_SRGB - 128, // ASTC_2D_10X8 + 128, // ASTC_2D_10X8_UNORM 128, // ASTC_2D_10X8_SRGB - 128, // ASTC_2D_6X6 + 128, // ASTC_2D_6X6_UNORM 128, // ASTC_2D_6X6_SRGB - 128, // ASTC_2D_10X10 + 128, // ASTC_2D_10X10_UNORM 128, // ASTC_2D_10X10_SRGB - 128, // ASTC_2D_12X12 + 128, // ASTC_2D_12X12_UNORM 128, // ASTC_2D_12X12_SRGB - 128, // ASTC_2D_8X6 + 128, // ASTC_2D_8X6_UNORM 128, // ASTC_2D_8X6_SRGB - 128, // ASTC_2D_6X5 + 128, // ASTC_2D_6X5_UNORM 128, // ASTC_2D_6X5_SRGB - 32, // E5B9G9R9F - 32, // Z32F - 16, // Z16 - 32, // Z24S8 - 32, // S8Z24 - 64, // Z32FS8 + 32, // E5B9G9R9_FLOAT + 32, // D32_FLOAT + 16, // D16_UNORM + 32, // D24_UNORM_S8_UINT + 32, // S8_UINT_D24_UNORM + 64, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -529,7 +584,4 @@ bool IsPixelFormatSRGB(PixelFormat format); std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); -/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN -bool IsFormatBCn(PixelFormat format); - } // namespace VideoCore::Surface diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index f476f03b0..7d5a75648 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -19,8 +19,6 @@ constexpr auto SNORM = ComponentType::SNORM; constexpr auto UNORM = ComponentType::UNORM; constexpr auto SINT = ComponentType::SINT; constexpr auto UINT = ComponentType::UINT; -constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16; -constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16; constexpr auto FLOAT = ComponentType::FLOAT; constexpr bool C = false; // Normal color constexpr bool S = true; // Srgb @@ -41,119 +39,126 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 78> DefinitionTable = {{ - {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, - {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, - {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, - {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB}, +constexpr std::array<Table, 86> DefinitionTable = {{ + {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, + {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, + {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, + {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, + {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U}, + {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U}, + {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, + {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U}, + {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U}, + {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U}, - {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI}, + {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, + {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, + {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, + {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, - {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, - {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI}, + {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, + {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, + {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, + {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, - {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, - {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, - {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, + {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, + {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, + {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, + {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, + {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F}, - {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16}, - {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S}, - {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI}, - {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I}, + {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, + {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, + {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, + {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, + {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F}, - {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U}, - {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S}, - {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI}, - {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I}, + {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, + {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, + {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, + {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, + {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F}, + {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F}, - {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI}, + {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, + {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, + {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F}, + {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F}, - {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI}, + {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, + {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, + {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, - {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, - {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I}, + {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, + {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, + {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, + {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, - {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, - {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, - {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, - {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, + {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, + {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, + {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, + {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, + {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, - {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, + {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, + {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23}, - {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB}, + {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, + {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45}, - {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB}, + {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, + {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - // TODO: Use a different pixel format for SNORM - {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1}, - {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1}, + {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, + {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM}, - {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM}, + {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, + {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U}, - {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB}, + {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, + {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16}, - {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16}, + {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, + {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4}, + {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4}, + {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5}, + {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8}, + {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5}, + {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8}, + {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6}, + {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10}, + {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12}, + {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6}, + {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, - {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5}, + {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, }}; @@ -184,7 +189,7 @@ PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb static_cast<int>(format), is_srgb, static_cast<int>(red_component), static_cast<int>(green_component), static_cast<int>(blue_component), static_cast<int>(alpha_component)); - return PixelFormat::ABGR8U; + return PixelFormat::A8B8G8R8_UNORM; } void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 0caf3b4f0..dfcf36e0b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -228,7 +228,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - if (!is_converted && params.pixel_format != PixelFormat::S8Z24) { + if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { return; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 0b2b2b8c4..e8515321b 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -74,21 +74,21 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta SurfaceParams params; params.is_tiled = tic.IsTiled(); params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0, - params.block_height = params.is_tiled ? tic.BlockHeight() : 0, - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, + params.block_width = params.is_tiled ? tic.BlockWidth() : 0; + params.block_height = params.is_tiled ? tic.BlockHeight() : 0; + params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; params.pixel_format = lookup_table.GetPixelFormat( tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); params.type = GetFormatType(params.pixel_format); if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { - case PixelFormat::R16U: - case PixelFormat::R16F: - params.pixel_format = PixelFormat::Z16; + case PixelFormat::R16_UNORM: + case PixelFormat::R16_FLOAT: + params.pixel_format = PixelFormat::D16_UNORM; break; - case PixelFormat::R32F: - params.pixel_format = PixelFormat::Z32F; + case PixelFormat::R32_FLOAT: + params.pixel_format = PixelFormat::D32_FLOAT; break; default: UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", @@ -96,7 +96,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta } params.type = GetFormatType(params.pixel_format); } - params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. if (tic.IsBuffer()) { params.target = SurfaceTarget::TextureBuffer; @@ -130,14 +129,13 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl SurfaceParams params; params.is_tiled = tic.IsTiled(); params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0, - params.block_height = params.is_tiled ? tic.BlockHeight() : 0, - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, + params.block_width = params.is_tiled ? tic.BlockWidth() : 0; + params.block_height = params.is_tiled ? tic.BlockHeight() : 0; + params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; params.pixel_format = lookup_table.GetPixelFormat( tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); params.type = GetFormatType(params.pixel_format); - params.type = GetFormatType(params.pixel_format); params.target = ImageTypeToSurfaceTarget(entry.type); // TODO: on 1DBuffer we should use the tic info. if (tic.IsBuffer()) { @@ -165,38 +163,40 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl return params; } -SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { - const auto& regs = system.GPU().Maxwell3D().regs; - SurfaceParams params; - params.is_tiled = regs.zeta.memory_layout.type == - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = false; - params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U); - params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U); - params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); - params.type = GetFormatType(params.pixel_format); - params.width = regs.zeta_width; - params.height = regs.zeta_height; - params.pitch = 0; - params.num_levels = 1; - params.emulated_levels = 1; - - const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0; - params.is_layered = is_layered; - params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; - params.depth = is_layered ? regs.zeta_layers.Value() : 1U; - return params; +SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { + const auto& regs = maxwell3d.regs; + const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); + const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; + const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); + return { + .is_tiled = regs.zeta.memory_layout.type == + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, + .srgb_conversion = false, + .is_layered = is_layered, + .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), + .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), + .block_depth = block_depth, + .tile_width_spacing = 1, + .width = regs.zeta_width, + .height = regs.zeta_height, + .depth = is_layered ? regs.zeta_layers.Value() : 1U, + .pitch = 0, + .num_levels = 1, + .emulated_levels = 1, + .pixel_format = pixel_format, + .type = GetFormatType(pixel_format), + .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, + }; } -SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; +SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, + std::size_t index) { + const auto& config{maxwell3d.regs.rt[index]}; SurfaceParams params; params.is_tiled = config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || + config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; params.block_width = config.memory_layout.block_width; params.block_height = config.memory_layout.block_height; params.block_depth = config.memory_layout.block_depth; @@ -233,24 +233,29 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz SurfaceParams SurfaceParams::CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params{}; - params.is_tiled = !config.linear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.type = GetFormatType(params.pixel_format); - params.width = config.width; - params.height = config.height; - params.pitch = config.pitch; - // TODO(Rodrigo): Try to guess texture arrays from parameters - params.target = SurfaceTarget::Texture2D; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; + const bool is_tiled = !config.linear; + const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); + + SurfaceParams params{ + .is_tiled = is_tiled, + .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || + config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, + .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, + .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, + .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, + .tile_width_spacing = 1, + .width = config.width, + .height = config.height, + .depth = 1, + .pitch = config.pitch, + .num_levels = 1, + .emulated_levels = 1, + .pixel_format = pixel_format, + .type = GetFormatType(pixel_format), + // TODO(Rodrigo): Try to guess texture arrays from parameters + .target = SurfaceTarget::Texture2D, + }; + params.is_layered = params.IsLayered(); return params; } @@ -343,8 +348,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } if (is_tiled && is_layered) { - return Common::AlignBits(size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); } return size; } @@ -418,7 +422,7 @@ std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { const u32 block_size = GetBlockSize(); const u32 block_index = offset / block_size; const u32 gob_offset = offset % block_size; - const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize()); + const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); const u32 x_gob_pixels = 64U / GetBytesPerPixel(); const u32 x_block_pixels = x_gob_pixels << block_width; const u32 y_block_pixels = 8U << block_height; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 24957df8d..4466c3c34 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -33,10 +33,11 @@ public: const VideoCommon::Shader::Image& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer(Core::System& system); + static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); /// Creates SurfaceCachedParams from a framebuffer configuration. - static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); + static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, + std::size_t index); /// Creates SurfaceCachedParams from a Fermi2D surface configuration. static SurfaceParams CreateForFermiCopySurface( @@ -204,7 +205,7 @@ public: static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { return Common::AlignBits(out_size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); } /// Converts a width from a type of surface into another. This helps represent the diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cdcddb225..ea835c59f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -135,8 +135,7 @@ public: return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -160,8 +159,7 @@ public: if (!gpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -183,11 +181,11 @@ public: TView GetDepthBufferSurface(bool preserve_contents) { std::lock_guard lock{mutex}; - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { + auto& dirty = maxwell3d.dirty; + if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { return depth_buffer.view; } - maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; + dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; @@ -195,13 +193,12 @@ public: SetEmptyDepthBuffer(); return {}; } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { SetEmptyDepthBuffer(); return {}; } - const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; + const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); @@ -215,7 +212,6 @@ public: TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { return render_targets[index].view; } @@ -235,15 +231,14 @@ public: return {}; } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { SetEmptyColorBuffer(index); return {}; } auto surface_view = - GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), preserve_contents, true); if (render_targets[index].target) { auto& surface = render_targets[index].target; @@ -300,9 +295,8 @@ public: const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const auto& memory_manager = system.GPU().MemoryManager(); - const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); - const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); + const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); + const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; ImageBlit(src_surface, dst_surface.second, copy_config); @@ -358,9 +352,11 @@ public: } protected: - explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - bool is_astc_supported) - : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} { + explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + bool is_astc_supported_) + : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + gpu_memory{gpu_memory_} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } @@ -373,9 +369,9 @@ protected: siblings_table[static_cast<std::size_t>(b)] = a; }; std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); - make_siblings(PixelFormat::Z16, PixelFormat::R16U); - make_siblings(PixelFormat::Z32F, PixelFormat::R32F); - make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); + make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); + make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); sampled_textures.reserve(64); } @@ -395,7 +391,7 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { - auto& dirty = system.GPU().Maxwell3D().dirty; + auto& dirty = maxwell3d.dirty; const u32 index = surface->GetRenderTarget(); if (index == DEPTH_RT) { dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; @@ -408,8 +404,7 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const std::size_t size = surface->GetSizeInBytes(); - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); @@ -459,7 +454,6 @@ protected: return new_surface; } - Core::System& system; const bool is_astc_supported; private: @@ -954,8 +948,7 @@ private: * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { Deduction result{}; @@ -1031,7 +1024,7 @@ private: params.pitch = 4; params.num_levels = 1; params.emulated_levels = 1; - params.pixel_format = VideoCore::Surface::PixelFormat::R8U; + params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; params.type = VideoCore::Surface::SurfaceType::ColorTexture; auto surface = CreateSurface(0ULL, params); invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); @@ -1112,7 +1105,7 @@ private: void LoadSurface(const TSurface& surface) { staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); + surface->LoadBuffer(gpu_memory, staging_cache); surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -1123,7 +1116,7 @@ private: } staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); + surface->FlushBuffer(gpu_memory, staging_cache); surface->MarkAsModified(false, Tick()); } @@ -1253,6 +1246,8 @@ private: } VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; FormatLookupTable format_lookup_table; FormatCompatibility format_compatibility; diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index f3efa7eb0..962921483 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -35,7 +35,7 @@ void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { S8Z24 s8z24_pixel{}; Z24S8 z24s8_pixel{}; constexpr auto bpp{ - VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; + VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; for (std::size_t y = 0; y < height; ++y) { for (std::size_t x = 0; x < width; ++x) { const std::size_t offset{bpp * (y * width + x)}; @@ -73,7 +73,7 @@ void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, in_data, width, height, depth, block_width, block_height); std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { + } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); } } @@ -85,7 +85,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h static_cast<u32>(pixel_format)); UNREACHABLE(); - } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { + } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); } } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 548e4c3fe..16d46a018 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -6,6 +6,7 @@ #include <cstring> #include "common/alignment.h" #include "common/assert.h" +#include "common/bit_util.h" #include "video_core/gpu.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" @@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable { std::array<std::array<u16, M>, N> values{}; }; -constexpr u32 gob_size_x_shift = 6; -constexpr u32 gob_size_y_shift = 3; -constexpr u32 gob_size_z_shift = 0; -constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; +constexpr u32 FAST_SWIZZLE_ALIGN = 16; -constexpr u32 gob_size_x = 1U << gob_size_x_shift; -constexpr u32 gob_size_y = 1U << gob_size_y_shift; -constexpr u32 gob_size_z = 1U << gob_size_z_shift; -constexpr u32 gob_size = 1U << gob_size_shift; - -constexpr u32 fast_swizzle_align = 16; - -constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); -constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>(); +constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); +constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); /** * This function manages ALL the GOBs(Group of Bytes) Inside a single block. @@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con u32 y_address = z_address; u32 pixel_base = layer_z * z + y_start * stride_x; for (u32 y = y_start; y < y_end; y++) { - const auto& table = legacy_swizzle_table[y % gob_size_y]; + const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (u32 x = x_start; x < x_end; x++) { - const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]}; + const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; data_ptrs[unswizzle] = swizzled_data + swizzle_offset; data_ptrs[!unswizzle] = unswizzled_data + pixel_index; std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); } pixel_base += stride_x; - if ((y + 1) % gob_size_y == 0) - y_address += gob_size; + if ((y + 1) % GOB_SIZE_Y == 0) + y_address += GOB_SIZE; } z_address += xy_block_size; } @@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const u32 y_address = z_address; u32 pixel_base = layer_z * z + y_start * stride_x; for (u32 y = y_start; y < y_end; y++) { - const auto& table = fast_swizzle_table[y % gob_size_y]; - for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) { - const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; + const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; + for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { + const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; const u32 pixel_index{out_x + pixel_base}; data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); + std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); } pixel_base += stride_x; - if ((y + 1) % gob_size_y == 0) - y_address += gob_size; + if ((y + 1) % GOB_SIZE_Y == 0) + y_address += GOB_SIZE; } z_address += xy_block_size; } @@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 stride_x = width * out_bytes_per_pixel; const u32 layer_z = height * stride_x; - const u32 gob_elements_x = gob_size_x / bytes_per_pixel; - constexpr u32 gob_elements_y = gob_size_y; - constexpr u32 gob_elements_z = gob_size_z; + const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; + constexpr u32 gob_elements_y = GOB_SIZE_Y; + constexpr u32 gob_elements_z = GOB_SIZE_Z; const u32 block_x_elements = gob_elements_x; const u32 block_y_elements = gob_elements_y * block_height; const u32 block_z_elements = gob_elements_z * block_depth; @@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 xy_block_size = gob_size * block_height; + const u32 xy_block_size = GOB_SIZE * block_height; const u32 block_size = xy_block_size * block_depth; u32 tile_offset = 0; for (u32 zb = 0; zb < blocks_on_z; zb++) { @@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { const u32 block_height_size{1U << block_height}; const u32 block_depth_size{1U << block_depth}; - if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { + if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, bytes_per_pixel, out_bytes_per_pixel, block_height_size, block_depth_size, width_spacing); @@ -193,53 +184,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, } } -u32 BytesPerPixel(TextureFormat format) { - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXN1: - // In this case a 'pixel' actually refers to a 4x4 tile. - return 8; - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - case TextureFormat::BC6H_UF16: - case TextureFormat::BC6H_SF16: - // In this case a 'pixel' actually refers to a 4x4 tile. - return 16; - case TextureFormat::R32_G32_B32: - return 12; - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::ASTC_2D_5X4: - case TextureFormat::ASTC_2D_8X8: - case TextureFormat::ASTC_2D_8X5: - case TextureFormat::ASTC_2D_10X8: - case TextureFormat::ASTC_2D_5X5: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::BF10GF11RF11: - case TextureFormat::R32: - case TextureFormat::R16_G16: - return 4; - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::G8R8: - case TextureFormat::R16: - return 2; - case TextureFormat::R8: - return 1; - case TextureFormat::R16_G16_B16_A16: - return 8; - case TextureFormat::R32_G32_B32_A32: - return 16; - case TextureFormat::R32_G32: - return 8; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - return 1; - } -} - void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 width_spacing) { @@ -259,47 +203,82 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, u32 offset_x, u32 offset_y) { const u32 block_height = 1U << block_height_bit; - const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / - gob_size_x}; + const u32 image_width_in_gobs = + (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; for (u32 line = 0; line < subrect_height; ++line) { const u32 dst_y = line + offset_y; const u32 gob_address_y = - (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; - const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; + (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + + ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; + const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = - gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; - const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; - u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; - u8* dest_addr = swizzled_data + swizzled_offset; + gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; + const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; + const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; + const u8* const source_line = unswizzled_data + unswizzled_offset; + u8* const dest_addr = swizzled_data + swizzled_offset; std::memcpy(dest_addr, source_line, bytes_per_pixel); } } } -void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, - u32 block_height_bit, u32 offset_x, u32 offset_y) { - const u32 block_height = 1U << block_height_bit; - for (u32 line = 0; line < subrect_height; ++line) { - const u32 y2 = line + offset_y; - const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + - ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; - const auto& table = legacy_swizzle_table[y2 % gob_size_y]; - for (u32 x = 0; x < subrect_width; ++x) { - const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; - const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; - u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; - u8* source_addr = swizzled_data + swizzled_offset; +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, + u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { + const u32 stride = width * bytes_per_pixel; + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); + + const u32 block_height_mask = (1U << block_height) - 1; + const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; + + for (u32 line = 0; line < line_count; ++line) { + const u32 src_y = line + origin_y; + const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; + + const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; + const u32 src_offset_y = (block_y >> block_height) * block_size + + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + for (u32 column = 0; column < line_length_in; ++column) { + const u32 src_x = (column + origin_x) * bytes_per_pixel; + const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; + + const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; + const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel; - std::memcpy(dest_line, source_addr, bytes_per_pixel); + std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel); + } + } +} + +void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, + u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, + u32 origin_y, u8* output, const u8* input) { + UNIMPLEMENTED_IF(origin_x > 0); + UNIMPLEMENTED_IF(origin_y > 0); + + const u32 stride = width * bytes_per_pixel; + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); + + const u32 block_height_mask = (1U << block_height) - 1; + const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; + + for (u32 line = 0; line < line_count; ++line) { + const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; + const u32 block_y = line / GOB_SIZE_Y; + const u32 dst_offset_y = + (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; + for (u32 x = 0; x < line_length_in; ++x) { + const u32 dst_offset = + ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; + const u32 src_offset = x * bytes_per_pixel + line * pitch; + std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); } } } @@ -308,17 +287,17 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { const u32 block_height = 1U << block_height_bit; - const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; + const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; - const auto& table = legacy_swizzle_table[y % gob_size_y]; + (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + + ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; + const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height; - const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; + gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; + const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; count++; @@ -328,54 +307,12 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 } } -std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, - u32 height) { - std::vector<u8> rgba_data; - - // TODO(Subv): Implement. - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN1: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - case TextureFormat::BC6H_UF16: - case TextureFormat::BC6H_SF16: - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::ASTC_2D_8X8: - case TextureFormat::ASTC_2D_5X5: - case TextureFormat::ASTC_2D_10X8: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::R8: - case TextureFormat::G8R8: - case TextureFormat::BF10GF11RF11: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R16_G16: - case TextureFormat::R32_G32_B32: - // TODO(Subv): For the time being just forward the same data without any decoding. - rgba_data = texture_data; - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - - return rgba_data; -} - std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); - const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); - const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); + const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT); + const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height); + const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth); return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; @@ -386,14 +323,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, u32 bytes_per_pixel) { auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 gobs_in_block = 1 << block_height; - const u32 y_blocks = gob_size_y << block_height; - const u32 x_per_gob = gob_size_x / bytes_per_pixel; + const u32 y_blocks = GOB_SIZE_Y << block_height; + const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel; const u32 x_blocks = div_ceil(width, x_per_gob); - const u32 block_size = gob_size * gobs_in_block; + const u32 block_size = GOB_SIZE * gobs_in_block; const u32 stride = block_size * x_blocks; const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; const u32 relative_y = dst_y % y_blocks; - return base + (relative_y / gob_size_y) * gob_size; + return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE; } } // namespace Tegra::Texture diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 06f3ebf87..01e156bc8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -10,15 +10,15 @@ namespace Tegra::Texture { -// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents -// an small rect of (64/bytes_per_pixel)X8. -inline std::size_t GetGOBSize() { - return 512; -} +constexpr u32 GOB_SIZE_X = 64; +constexpr u32 GOB_SIZE_Y = 8; +constexpr u32 GOB_SIZE_Z = 1; +constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; -inline std::size_t GetGOBSizeShift() { - return 9; -} +constexpr std::size_t GOB_SIZE_X_SHIFT = 6; +constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; +constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; +constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; /// Unswizzles a swizzled texture without changing its format. void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, @@ -38,23 +38,36 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); -/// Decodes an unswizzled texture into a A8R8G8B8 texture. -std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, - u32 height); - /// This function calculates the correct size of a texture depending if it's tiled or not. std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth); /// Copies an untiled subrectangle into a tiled surface. void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, - u32 offset_x, u32 offset_y); + u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, + u32 block_height_bit, u32 offset_x, u32 offset_y); /// Copies a tiled subrectangle into a linear surface. -void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, - u32 offset_x, u32 offset_y); +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, + u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input); + +/// @brief Swizzles a 2D array of pixels into a 3D texture +/// @param line_length_in Number of pixels per line +/// @param line_count Number of lines +/// @param pitch Number of bytes per line +/// @param width Width of the swizzled texture +/// @param height Height of the swizzled texture +/// @param bytes_per_pixel Number of bytes used per pixel +/// @param block_height Block height shift +/// @param block_depth Block depth shift +/// @param origin_x Column offset in pixels of the swizzled texture +/// @param origin_y Row offset in pixels of the swizzled texture +/// @param output Pointer to the pixels of the swizzled texture +/// @param input Pointer to the 2D array of pixels used as input +/// @pre input and output points to an array large enough to hold the number of bytes used +void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, + u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, + u32 origin_y, u8* output, const u8* input); void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, std::size_t copy_size, const u8* source_data, u8* swizzle_data); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index eba05aced..0574fef12 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -12,10 +12,10 @@ namespace Tegra::Texture { enum class TextureFormat : u32 { - R32_G32_B32_A32 = 0x01, - R32_G32_B32 = 0x02, - R16_G16_B16_A16 = 0x03, - R32_G32 = 0x04, + R32G32B32A32 = 0x01, + R32G32B32 = 0x02, + R16G16B16A16 = 0x03, + R32G32 = 0x04, R32_B24G8 = 0x05, ETC2_RGB = 0x06, X8B8G8R8 = 0x07, @@ -23,19 +23,19 @@ enum class TextureFormat : u32 { A2B10G10R10 = 0x09, ETC2_RGB_PTA = 0x0a, ETC2_RGBA = 0x0b, - R16_G16 = 0x0c, - G8R24 = 0x0d, - G24R8 = 0x0e, + R16G16 = 0x0c, + R24G8 = 0x0d, + R8G24 = 0x0e, R32 = 0x0f, - BC6H_SF16 = 0x10, - BC6H_UF16 = 0x11, + BC6H_SFLOAT = 0x10, + BC6H_UFLOAT = 0x11, A4B4G4R4 = 0x12, A5B5G5R1 = 0x13, A1B5G5R5 = 0x14, B5G6R5 = 0x15, B6G5R5 = 0x16, - BC7U = 0x17, - G8R8 = 0x18, + BC7 = 0x17, + R8G8 = 0x18, EAC = 0x19, EACX2 = 0x1a, R16 = 0x1b, @@ -43,23 +43,23 @@ enum class TextureFormat : u32 { R8 = 0x1d, G4R4 = 0x1e, R1 = 0x1f, - E5B9G9R9_SHAREDEXP = 0x20, - BF10GF11RF11 = 0x21, + E5B9G9R9 = 0x20, + B10G11R11 = 0x21, G8B8G8R8 = 0x22, B8G8R8G8 = 0x23, - DXT1 = 0x24, - DXT23 = 0x25, - DXT45 = 0x26, - DXN1 = 0x27, - DXN2 = 0x28, - S8Z24 = 0x29, + BC1_RGBA = 0x24, + BC2 = 0x25, + BC3 = 0x26, + BC4 = 0x27, + BC5 = 0x28, + S8D24 = 0x29, X8Z24 = 0x2a, - Z24S8 = 0x2b, + D24S8 = 0x2b, X4V4Z24__COV4R4V = 0x2c, X4V4Z24__COV8R8V = 0x2d, V8Z24__COV4R12V = 0x2e, - ZF32 = 0x2f, - ZF32_X24S8 = 0x30, + D32 = 0x2f, + D32S8 = 0x30, X8Z24_X20V4S8__COV4R4V = 0x31, X8Z24_X20V4S8__COV8R8V = 0x32, ZF32_X20V4X8__COV4R4V = 0x33, @@ -69,7 +69,7 @@ enum class TextureFormat : u32 { X8Z24_X16V8S8__COV4R12V = 0x37, ZF32_X16V8X8__COV4R12V = 0x38, ZF32_X16V8S8__COV4R12V = 0x39, - Z16 = 0x3a, + D16 = 0x3a, V8Z24__COV8R24V = 0x3b, X8Z24_X16V8S8__COV8R24V = 0x3c, ZF32_X16V8X8__COV8R24V = 0x3d, @@ -375,7 +375,4 @@ struct FullTextureInfo { TSCEntry tsc; }; -/// Returns the number of bytes per pixel of the input texture format. -u32 BytesPerPixel(TextureFormat format); - } // namespace Tegra::Texture diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 45f360bdd..a14df06a3 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <memory> + #include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" @@ -16,37 +17,49 @@ #include "video_core/video_core.h" namespace { -std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system, - Core::Frontend::GraphicsContext& context) { + +std::unique_ptr<VideoCore::RendererBase> CreateRenderer( + Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context) { + auto& telemetry_session = system.TelemetrySession(); + auto& cpu_memory = system.Memory(); + switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); + return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, + gpu, std::move(context)); #ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: - return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); + return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, + gpu, std::move(context)); #endif default: return nullptr; } } + } // Anonymous namespace namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + std::unique_ptr<Tegra::GPU> gpu; + if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { + gpu = std::make_unique<VideoCommon::GPUAsynch>(system); + } else { + gpu = std::make_unique<VideoCommon::GPUSynch>(system); + } + auto context = emu_window.CreateSharedContext(); const auto scope = context->Acquire(); - auto renderer = CreateRenderer(emu_window, system, *context); + + auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); if (!renderer->Init()) { return nullptr; } - if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), - std::move(context)); - } - return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context)); + gpu->BindRenderer(std::move(renderer)); + return gpu; } u16 GetResolutionScaleFactor(const RendererBase& renderer) { |