diff options
Diffstat (limited to 'src/video_core')
21 files changed, 248 insertions, 143 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2f6cdd216..269db21a5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -231,6 +231,7 @@ endif() target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) +target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f798a0053..61966cbfe 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -5,6 +5,7 @@ #include <fstream> #include <vector> #include "common/assert.h" +#include "common/settings.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/command_classes/codecs/vp9.h" @@ -16,108 +17,146 @@ extern "C" { } namespace Tegra { -#if defined(LIBVA_FOUND) -// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license namespace { -constexpr std::array<const char*, 2> VAAPI_DRIVERS = { - "i915", - "amdgpu", -}; +constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; +constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; + +void AVPacketDeleter(AVPacket* ptr) { + av_packet_free(&ptr); +} -AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) { +using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>; + +AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { - if (*p == AV_PIX_FMT_VAAPI) { - return AV_PIX_FMT_VAAPI; + if (*p == av_codec_ctx->pix_fmt) { + return av_codec_ctx->pix_fmt; } } LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); - return *pix_fmts; + av_buffer_unref(&av_codec_ctx->hw_device_ctx); + av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; + return PREFERRED_CPU_FMT; +} +} // namespace + +void AVFrameDeleter(AVFrame* ptr) { + av_frame_free(&ptr); } -bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) { +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), + vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} + +Codec::~Codec() { + if (!initialized) { + return; + } + // Free libav memory + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); +} + +bool Codec::CreateGpuAvDevice() { +#if defined(LIBVA_FOUND) + static constexpr std::array<const char*, 3> VAAPI_DRIVERS = { + "i915", + "iHD", + "amdgpu", + }; AVDictionary* hwdevice_options = nullptr; av_dict_set(&hwdevice_options, "connection_type", "drm", 0); for (const auto& driver : VAAPI_DRIVERS) { av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI, + const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, nullptr, hwdevice_options, 0); if (hwdevice_error >= 0) { LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); av_dict_free(&hwdevice_options); + av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; return true; } LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); } LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); av_dict_free(&hwdevice_options); - return false; -} -} // namespace #endif - -void AVFrameDeleter(AVFrame* ptr) { - av_frame_free(&ptr); + static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; + static constexpr std::array GPU_DECODER_TYPES{ + AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, +#else + AV_HWDEVICE_TYPE_VDPAU, +#endif + }; + for (const auto& type : GPU_DECODER_TYPES) { + const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + if (hwdevice_res < 0) { + LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", + av_hwdevice_get_type_name(type), hwdevice_res); + continue; + } + for (int i = 0;; i++) { + const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); + if (!config) { + LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", + av_codec->name, av_hwdevice_get_type_name(type)); + break; + } + if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { + av_codec_ctx->pix_fmt = config->pix_fmt; + LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + return true; + } + } + } + return false; } -Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) - : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), - vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} - -Codec::~Codec() { - if (!initialized) { - return; - } - // Free libav memory - avcodec_send_packet(av_codec_ctx, nullptr); - AVFrame* av_frame = av_frame_alloc(); - avcodec_receive_frame(av_codec_ctx, av_frame); - avcodec_flush_buffers(av_codec_ctx); - av_frame_free(&av_frame); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); +void Codec::InitializeAvCodecContext() { + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); } -void Codec::InitializeHwdec() { - // Prioritize integrated GPU to mitigate bandwidth bottlenecks -#if defined(LIBVA_FOUND) - if (CreateVaapiHwdevice(&av_hw_device)) { - const auto hw_device_ctx = av_buffer_ref(av_hw_device); - ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); - av_codec_ctx->hw_device_ctx = hw_device_ctx; - av_codec_ctx->get_format = GetHwFormat; +void Codec::InitializeGpuDecoder() { + if (!CreateGpuAvDevice()) { + av_buffer_unref(&av_gpu_decoder); return; } -#endif - // TODO more GPU accelerated decoders + auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); + ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); + av_codec_ctx->hw_device_ctx = hw_device_ctx; + av_codec_ctx->get_format = GetGpuFormat; } void Codec::Initialize() { - AVCodecID codec; - switch (current_codec) { - case NvdecCommon::VideoCodec::H264: - codec = AV_CODEC_ID_H264; - break; - case NvdecCommon::VideoCodec::Vp9: - codec = AV_CODEC_ID_VP9; - break; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + const AVCodecID codec = [&] { + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case NvdecCommon::VideoCodec::Vp9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + return AV_CODEC_ID_NONE; + } + }(); + av_codec = avcodec_find_decoder(codec); + + InitializeAvCodecContext(); + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) { + InitializeGpuDecoder(); + } + if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); return; } - av_codec = avcodec_find_decoder(codec); - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - InitializeHwdec(); if (!av_codec_ctx->hw_device_ctx) { LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); } - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); - return; - } initialized = true; } @@ -133,6 +172,9 @@ void Codec::Decode() { if (is_first_frame) { Initialize(); } + if (!initialized) { + return; + } bool vp9_hidden_frame = false; std::vector<u8> frame_data; if (current_codec == NvdecCommon::VideoCodec::H264) { @@ -141,50 +183,48 @@ void Codec::Decode() { frame_data = vp9_decoder->ComposeFrameHeader(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); } - AVPacket packet{}; - av_init_packet(&packet); - packet.data = frame_data.data(); - packet.size = static_cast<s32>(frame_data.size()); - if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret); + AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; + if (!packet) { + LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); + return; + } + packet->data = frame_data.data(); + packet->size = static_cast<s32>(frame_data.size()); + if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { + LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); return; } // Only receive/store visible frames if (vp9_hidden_frame) { return; } - AVFrame* hw_frame = av_frame_alloc(); - AVFrame* sw_frame = hw_frame; - ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed"); - if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) { + AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; + AVFramePtr final_frame{nullptr, AVFrameDeleter}; + ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); + if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); - av_frame_free(&hw_frame); return; } - if (!hw_frame->width || !hw_frame->height) { + if (initial_frame->width == 0 || initial_frame->height == 0) { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); - av_frame_free(&hw_frame); return; } -#if defined(LIBVA_FOUND) - // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license - if (hw_frame->format == AV_PIX_FMT_VAAPI) { - sw_frame = av_frame_alloc(); - ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed"); + if (av_codec_ctx->hw_device_ctx) { + final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // because Intel drivers crash unless using AV_PIX_FMT_NV12 - sw_frame->format = AV_PIX_FMT_NV12; - const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0); - ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret); - av_frame_free(&hw_frame); + final_frame->format = PREFERRED_GPU_FMT; + const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); + ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); + } else { + final_frame = std::move(initial_frame); } -#endif - if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) { - UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format); - av_frame_free(&sw_frame); + if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) { + UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter}); + av_frames.push(std::move(final_frame)); if (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 71936203f..f9a80886f 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -5,6 +5,7 @@ #pragma once #include <memory> +#include <string_view> #include <queue> #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -50,18 +51,23 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; private: - void InitializeHwdec(); + void InitializeAvCodecContext(); + + void InitializeGpuDecoder(); + + bool CreateGpuAvDevice(); bool initialized{}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; AVCodec* av_codec{nullptr}; - AVBufferRef* av_hw_device{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; + AVBufferRef* av_gpu_decoder{nullptr}; GPU& gpu; const NvdecCommon::NvdecRegisters& state; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 5fb6d45ee..51ee14c13 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); - writer.WriteUe(16); + // TODO (ameerj): Where do we get this number, it seems to be particular for each stream + writer.WriteUe(6); // Max number of reference frames writer.WriteBit(false); writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); writer.WriteUe(pic_height - 1); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1aa43523a..7f4ca6282 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -475,10 +475,10 @@ public: // These values are used by Nouveau and some games. AddGL = 0x8006, - SubtractGL = 0x8007, - ReverseSubtractGL = 0x8008, - MinGL = 0x800a, - MaxGL = 0x800b + MinGL = 0x8007, + MaxGL = 0x8008, + SubtractGL = 0x800a, + ReverseSubtractGL = 0x800b }; enum class Factor : u32 { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index c60ed6453..dce00e829 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c9b0d6db..9ff0a28cd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { blit_screen.Recreate(); } const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); - scheduler.Flush(render_semaphore); + const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); + scheduler.Flush(render_semaphore, present_semaphore); scheduler.WaitWorker(); swapchain.Present(render_semaphore); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index cb0580182..888bc7392 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -358,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() { void VKBlitScreen::CreateRenderPass() { const VkAttachmentDescription color_attachment{ .flags = 0, - .format = swapchain.GetImageFormat(), + .format = swapchain.GetImageViewFormat(), .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 8e77e4796..adb557f60 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <mutex> #include <span> #include <vector> diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3ac18ea54..841a6b846 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() { }; const u32 color_attachment = regs.clear_buffers.RT; - const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask; - const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; - if (use_color && is_color_rt) { + if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { VkClearValue clear_value; std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); @@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() { return; } VkImageAspectFlags aspect_flags = 0; - if (use_depth) { + if (use_depth && framebuffer->HasAspectDepthBit()) { aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } - if (use_stencil) { + if (use_stencil && framebuffer->HasAspectStencilBit()) { aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } + if (aspect_flags == 0) { + return; + } scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { VkClearAttachment attachment; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 4840962de..1d438787a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(VkSemaphore semaphore) { - SubmitExecution(semaphore); +void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + SubmitExecution(signal_semaphore, wait_semaphore); AllocateNewContext(); } -void VKScheduler::Finish(VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { const u64 presubmit_tick = CurrentTick(); - SubmitExecution(semaphore); + SubmitExecution(signal_semaphore, wait_semaphore); WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); @@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() { }); } -void VKScheduler::SubmitExecution(VkSemaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { EndPendingOperations(); InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); - Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); - - const u32 num_signal_semaphores = semaphore ? 2U : 1U; - - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + + const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + }; const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), .signalSemaphoreValueCount = num_signal_semaphores, .pSignalSemaphoreValues = signal_values.data(), }; const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1, .pCommandBuffers = cmdbuf.address(), .signalSemaphoreCount = num_signal_semaphores, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cf39a2363..759ed5a48 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -34,10 +34,10 @@ public: ~VKScheduler(); /// Sends the current execution context to the GPU. - void Flush(VkSemaphore semaphore = nullptr); + void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(VkSemaphore semaphore = nullptr); + void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -191,7 +191,7 @@ private: void AllocateWorkerCommandBuffer(); - void SubmitExecution(VkSemaphore semaphore); + void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index d990eefba..aadf03cb0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -20,16 +20,15 @@ namespace Vulkan { namespace { -VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { +VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) { if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { VkSurfaceFormatKHR format; format.format = VK_FORMAT_B8G8R8A8_UNORM; format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; return format; } - const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { - const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; - return format.format == request_format && + const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) { + return format.format == VK_FORMAT_B8G8R8A8_UNORM && format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; }); return found != formats.end() ? *found : formats[0]; @@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() { } void VKSwapchain::Present(VkSemaphore render_semaphore) { - const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; - const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreCount = render_semaphore ? 2U : 1U, - .pWaitSemaphores = semaphores.data(), + .waitSemaphoreCount = render_semaphore ? 1U : 0U, + .pWaitSemaphores = &render_semaphore, .swapchainCount = 1, .pSwapchains = swapchain.address(), .pImageIndices = &image_index, @@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; - const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; + const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; u32 requested_image_count{capabilities.minImageCount + 1}; @@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } + static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB}; + VkImageFormatListCreateInfo format_list{ + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR, + .pNext = nullptr, + .viewFormatCount = static_cast<u32>(view_formats.size()), + .pViewFormats = view_formats.data(), + }; + if (device.IsKhrSwapchainMutableFormatEnabled()) { + format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list); + swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR; + } // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); - image_format = surface_format.format; + image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; } void VKSwapchain::CreateSemaphores() { @@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() { .flags = 0, .image = {}, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image_format, + .format = image_view_format, .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 35c2cdc14..5bce41e21 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -68,8 +68,12 @@ public: return *image_views[index]; } - VkFormat GetImageFormat() const { - return image_format; + VkFormat GetImageViewFormat() const { + return image_view_format; + } + + VkSemaphore CurrentPresentSemaphore() const { + return *present_semaphores[frame_index]; } private: @@ -96,7 +100,7 @@ private: u32 image_index{}; u32 frame_index{}; - VkFormat image_format{}; + VkFormat image_view_format{}; VkExtent2D extent{}; bool current_srgb{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8f4df7122..ff979a7ac 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); images[num_images] = depth_buffer->ImageHandle(); - image_ranges[num_images] = MakeSubresourceRange(depth_buffer); + const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer); + image_ranges[num_images] = subresource_range; samples = depth_buffer->Samples(); ++num_images; + has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; + has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0; } else { renderpass_key.depth_format = PixelFormat::Invalid; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 5fe6b7ba3..6d5a68bfe 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -232,6 +232,18 @@ public: return image_ranges; } + [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept { + return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + } + + [[nodiscard]] bool HasAspectDepthBit() const noexcept { + return has_depth; + } + + [[nodiscard]] bool HasAspectStencilBit() const noexcept { + return has_stencil; + } + private: vk::Framebuffer framebuffer; VkRenderPass renderpass{}; @@ -241,6 +253,8 @@ private: u32 num_images = 0; std::array<VkImage, 9> images{}; std::array<VkImageSubresourceRange, 9> image_ranges{}; + bool has_depth{}; + bool has_stencil{}; }; struct TextureCacheParams { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 8a4581c19..81a878bb2 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <filesystem> #include <fstream> #include <memory> diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 6180b8c0e..74cd3c9d8 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -4,6 +4,7 @@ #pragma once +#include <algorithm> #include <array> #include <bit> #include <concepts> diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 3b575db4d..cae543a51 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); + const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 86ca4be54..24821c1a3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -839,6 +839,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { bool has_khr_shader_float16_int8{}; bool has_khr_workgroup_memory_explicit_layout{}; bool has_khr_pipeline_executable_properties{}; + bool has_khr_image_format_list{}; + bool has_khr_swapchain_mutable_format{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -888,6 +890,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); + test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, + false); test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, @@ -1066,6 +1071,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { khr_pipeline_executable_properties = true; } } + if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { + extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); + extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); + khr_swapchain_mutable_format = true; + } if (khr_push_descriptor) { VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 234d74129..5599c38c5 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -224,6 +224,11 @@ public: return khr_pipeline_executable_properties; } + /// Returns true if VK_KHR_swapchain_mutable_format is enabled. + bool IsKhrSwapchainMutableFormatEnabled() const { + return khr_swapchain_mutable_format; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -390,6 +395,7 @@ private: bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool khr_pipeline_executable_properties{}; ///< Support for executable properties. + bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |