From 668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 30 Jan 2022 10:31:13 +0100 Subject: VideoCore: Refactor syncing. --- src/video_core/gpu.cpp | 197 +++++++++++++++++++++++++------------------------ 1 file changed, 102 insertions(+), 95 deletions(-) (limited to 'src/video_core/gpu.cpp') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index eebd7f3ff..1097db08a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -28,6 +28,8 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" +#include "video_core/host1x/host1x.h" +#include "video_core/host1x/syncpoint_manager.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" #include "video_core/shader_notify.h" @@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) - : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, + : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, shader_notify{std::make_unique()}, is_async{is_async_}, gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} @@ -115,31 +117,35 @@ struct GPU::Impl { } /// Request a host GPU memory flush from the CPU. - [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { - std::unique_lock lck{flush_request_mutex}; - const u64 fence = ++last_flush_fence; - flush_requests.emplace_back(fence, addr, size); + template + [[nodiscard]] u64 RequestSyncOperation(Func&& action) { + std::unique_lock lck{sync_request_mutex}; + const u64 fence = ++last_sync_fence; + sync_requests.emplace_back(action); return fence; } /// Obtains current flush request fence id. - [[nodiscard]] u64 CurrentFlushRequestFence() const { - return current_flush_fence.load(std::memory_order_relaxed); + [[nodiscard]] u64 CurrentSyncRequestFence() const { + return current_sync_fence.load(std::memory_order_relaxed); + } + + void WaitForSyncOperation(const u64 fence) { + std::unique_lock lck{sync_request_mutex}; + sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); } /// Tick pending requests within the GPU. void TickWork() { - std::unique_lock lck{flush_request_mutex}; - while (!flush_requests.empty()) { - auto& request = flush_requests.front(); - const u64 fence = request.fence; - const VAddr addr = request.addr; - const std::size_t size = request.size; - flush_requests.pop_front(); - flush_request_mutex.unlock(); - rasterizer->FlushRegion(addr, size); - current_flush_fence.store(fence); - flush_request_mutex.lock(); + std::unique_lock lck{sync_request_mutex}; + while (!sync_requests.empty()) { + auto request = std::move(sync_requests.front()); + sync_requests.pop_front(); + sync_request_mutex.unlock(); + request(); + current_sync_fence.fetch_add(1, std::memory_order_release); + sync_request_mutex.lock(); + sync_request_cv.notify_all(); } } @@ -207,78 +213,26 @@ struct GPU::Impl { /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value) { - // Synced GPU, is always in sync - if (!is_async) { - return; - } if (syncpoint_id == UINT32_MAX) { - // TODO: Research what this does. - LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); return; } MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=, this] { - if (shutting_down.load(std::memory_order_relaxed)) { - // We're shutting down, ensure no threads continue to wait for the next syncpoint - return true; - } - return syncpoints.at(syncpoint_id).load() >= value; - }); + host1x.GetSyncpointManager().WaitHost(syncpoint_id, value); } void IncrementSyncPoint(u32 syncpoint_id) { - auto& syncpoint = syncpoints.at(syncpoint_id); - syncpoint++; - std::scoped_lock lock{sync_mutex}; - sync_cv.notify_all(); - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - if (!interrupt.empty()) { - u32 value = syncpoint.load(); - auto it = interrupt.begin(); - while (it != interrupt.end()) { - if (value >= *it) { - TriggerCpuInterrupt(syncpoint_id, *it); - it = interrupt.erase(it); - continue; - } - it++; - } - } + host1x.GetSyncpointManager().IncrementHost(syncpoint_id); } [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { - return syncpoints.at(syncpoint_id).load(); + return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); } void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { - std::scoped_lock lock{sync_mutex}; - u32 current_value = syncpoints.at(syncpoint_id).load(); - if ((static_cast(current_value) - static_cast(value)) >= 0) { + auto& syncpoint_manager = host1x.GetSyncpointManager(); + syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() { TriggerCpuInterrupt(syncpoint_id, value); - return; - } - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - bool contains = std::any_of(interrupt.begin(), interrupt.end(), - [value](u32 in_value) { return in_value == value; }); - if (contains) { - return; - } - interrupt.emplace_back(value); - } - - [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { - std::scoped_lock lock{sync_mutex}; - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - const auto iter = - std::find_if(interrupt.begin(), interrupt.end(), - [value](u32 interrupt_value) { return value == interrupt_value; }); - - if (iter == interrupt.end()) { - return false; - } - interrupt.erase(iter); - return true; + }); } [[nodiscard]] u64 GetTicks() const { @@ -387,8 +341,48 @@ struct GPU::Impl { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } + void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences) { + size_t current_request_counter{}; + { + std::unique_lock lk(request_swap_mutex); + if (free_swap_counters.empty()) { + current_request_counter = request_swap_counters.size(); + request_swap_counters.emplace_back(num_fences); + } else { + current_request_counter = free_swap_counters.front(); + request_swap_counters[current_request_counter] = num_fences; + free_swap_counters.pop_front(); + } + } + const auto wait_fence = + RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { + auto& syncpoint_manager = host1x.GetSyncpointManager(); + if (num_fences == 0) { + renderer->SwapBuffers(framebuffer); + } + const auto executer = [this, current_request_counter, + framebuffer_copy = *framebuffer]() { + { + std::unique_lock lk(request_swap_mutex); + if (--request_swap_counters[current_request_counter] != 0) { + return; + } + free_swap_counters.push_back(current_request_counter); + } + renderer->SwapBuffers(&framebuffer_copy); + }; + for (size_t i = 0; i < num_fences; i++) { + syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); + } + }); + gpu_thread.TickGPU(); + WaitForSyncOperation(wait_fence); + } + GPU& gpu; Core::System& system; + Host1x::Host1x& host1x; std::map> cdma_pushers; std::unique_ptr renderer; @@ -411,18 +405,11 @@ struct GPU::Impl { std::condition_variable sync_cv; - struct FlushRequest { - explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) - : fence{fence_}, addr{addr_}, size{size_} {} - u64 fence; - VAddr addr; - std::size_t size; - }; - - std::list flush_requests; - std::atomic current_flush_fence{}; - u64 last_flush_fence{}; - std::mutex flush_request_mutex; + std::list> sync_requests; + std::atomic current_sync_fence{}; + u64 last_sync_fence{}; + std::mutex sync_request_mutex; + std::condition_variable sync_request_cv; const bool is_async; @@ -433,6 +420,10 @@ struct GPU::Impl { std::unordered_map> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; + + std::deque free_swap_counters; + std::deque request_swap_counters; + std::mutex request_swap_mutex; }; GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) @@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() { } u64 GPU::RequestFlush(VAddr addr, std::size_t size) { - return impl->RequestFlush(addr, size); + return impl->RequestSyncOperation( + [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); +} + +u64 GPU::CurrentSyncRequestFence() const { + return impl->CurrentSyncRequestFence(); } -u64 GPU::CurrentFlushRequestFence() const { - return impl->CurrentFlushRequestFence(); +void GPU::WaitForSyncOperation(u64 fence) { + return impl->WaitForSyncOperation(fence); } void GPU::TickWork() { impl->TickWork(); } +/// Gets a mutable reference to the Host1x interface +Host1x::Host1x& GPU::Host1x() { + return impl->host1x; +} + +/// Gets an immutable reference to the Host1x interface. +const Host1x::Host1x& GPU::Host1x() const { + return impl->host1x; +} + Engines::Maxwell3D& GPU::Maxwell3D() { return impl->Maxwell3D(); } @@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { return impl->ShaderNotify(); } +void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences) { + impl->RequestSwapBuffers(framebuffer, fences, num_fences); +} + void GPU::WaitFence(u32 syncpoint_id, u32 value) { impl->WaitFence(syncpoint_id, value); } @@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { impl->RegisterSyncptInterrupt(syncpoint_id, value); } -bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { - return impl->CancelSyncptInterrupt(syncpoint_id, value); -} - u64 GPU::GetTicks() const { return impl->GetTicks(); } -- cgit v1.2.3