From bc8b3d225eda388f0603830cbff8357893abb0f9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Feb 2022 01:16:11 +0100 Subject: VideoCore: Refactor fencing system. --- .../hle/service/nvdrv/devices/nvdisp_disp0.cpp | 5 +- src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | 3 +- src/core/hle/service/nvflinger/nvflinger.cpp | 15 +--- src/video_core/buffer_cache/buffer_cache.h | 13 +++ src/video_core/dma_pusher.cpp | 3 - src/video_core/engines/maxwell_3d.cpp | 24 +++++- src/video_core/engines/puller.cpp | 39 ++++++--- src/video_core/fence_manager.h | 96 +++++++++------------- src/video_core/gpu.cpp | 17 ++-- src/video_core/gpu.h | 4 +- src/video_core/gpu_thread.cpp | 2 +- src/video_core/rasterizer_interface.h | 7 +- .../renderer_opengl/gl_fence_manager.cpp | 13 +-- src/video_core/renderer_opengl/gl_fence_manager.h | 6 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 22 ++--- src/video_core/renderer_opengl/gl_rasterizer.h | 5 +- .../renderer_vulkan/vk_fence_manager.cpp | 15 +--- src/video_core/renderer_vulkan/vk_fence_manager.h | 6 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 21 ++--- src/video_core/renderer_vulkan/vk_rasterizer.h | 5 +- 20 files changed, 154 insertions(+), 167 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index e6a976714..18c5324a9 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle& crop_rect) { + const Common::Rectangle& crop_rect, + std::array& fences, u32 num_fences) { const VAddr addr = nvmap.GetHandleAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", @@ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form stride, format, transform, crop_rect}; system.GetPerfStats().EndSystemFrame(); - system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0); + system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); system.GetPerfStats().BeginSystemFrame(); } diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 1ca9b2e74..04217ab12 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -38,7 +38,8 @@ public: /// Performs a screen flip, drawing the buffer pointed to by the handle. void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle& crop_rect); + const Common::Rectangle& crop_rect, + std::array& fences, u32 num_fences); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index aa112021d..4658f1e8b 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -269,17 +269,6 @@ void NVFlinger::Compose() { return; // We are likely shutting down } - auto& syncpoint_manager = system.Host1x().GetSyncpointManager(); - const auto& multi_fence = buffer.fence; - guard->unlock(); - for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { - const auto& fence = multi_fence.fences[fence_id]; - syncpoint_manager.WaitGuest(fence.id, fence.value); - } - guard->lock(); - - MicroProfileFlip(); - // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based // on which display we're drawing (Default, Internal, External, etc) @@ -293,8 +282,10 @@ void NVFlinger::Compose() { nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), - static_cast(buffer.transform), crop_rect); + static_cast(buffer.transform), crop_rect, + buffer.fence.fences, buffer.fence.num_fences); + MicroProfileFlip(); guard->lock(); swap_interval = buffer.swap_interval; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6b6764d72..e55cac0d6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -826,6 +826,19 @@ void BufferCache

::CommitAsyncFlushesHigh() { const bool is_accuracy_normal = Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; + auto it = committed_ranges.begin(); + while (it != committed_ranges.end()) { + auto& current_intervals = *it; + auto next_it = std::next(it); + while (next_it != committed_ranges.end()) { + for (auto& interval : *next_it) { + current_intervals.subtract(interval); + } + next_it++; + } + it++; + } + boost::container::small_vector, 1> downloads; u64 total_size_bytes = 0; u64 largest_copy = 0; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index b01f04d0c..9835e3ac1 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, void DmaPusher::DispatchCalls() { MICROPROFILE_SCOPE(DispatchCalls); - gpu.SyncGuestHost(); - dma_pushbuffer_subindex = 0; dma_state.is_last_call = true; @@ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() { } } gpu.FlushCommands(); - gpu.SyncGuestHost(); gpu.OnCommandListEnd(); } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 3a4646289..950c70dcd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return; case MAXWELL3D_REG_INDEX(fragment_barrier): return rasterizer->FragmentBarrier(); + case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): + rasterizer->InvalidateGPUCache(); + return rasterizer->WaitForIdle(); case MAXWELL3D_REG_INDEX(tiled_cache_barrier): return rasterizer->TiledCacheBarrier(); } @@ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: - if (regs.query.query_get.fence == 1) { - rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); + if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) { + const GPUVAddr sequence_address{regs.query.QueryAddress()}; + const u32 payload = regs.query.query_sequence; + std::function operation([this, sequence_address, payload] { + memory_manager.Write(sequence_address, payload); + }); + rasterizer->SignalFence(std::move(operation)); } else { - StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + const GPUVAddr sequence_address{regs.query.QueryAddress()}; + const u32 payload = regs.query.query_sequence; + std::function operation([this, sequence_address, payload] { + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + }); + rasterizer->SignalFence(std::move(operation)); } break; case Regs::QueryOperation::Acquire: diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 8c17639e4..dd9494efa 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() { u64 timestamp; }; - Block block{}; - block.sequence = regs.semaphore_sequence; - // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of - // CoreTiming - block.timestamp = gpu.GetTicks(); - memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); + const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; + const u32 payload = regs.semaphore_sequence; + std::function operation([this, sequence_address, payload] { + Block block{}; + block.sequence = payload; + block.timestamp = gpu.GetTicks(); + memory_manager.WriteBlock(sequence_address, &block, sizeof(block)); + }); + rasterizer->SignalFence(std::move(operation)); } else { do { const u32 word{memory_manager.Read(regs.semaphore_address.SemaphoreAddress())}; @@ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { regs.acquire_active = true; regs.acquire_mode = false; if (word != regs.acquire_value) { + rasterizer->ReleaseFences(); std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } @@ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() { regs.acquire_active = true; regs.acquire_mode = true; if (word < regs.acquire_value) { + rasterizer->ReleaseFences(); std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else if (op == GpuSemaphoreOperation::AcquireMask) { - if (word & regs.semaphore_sequence == 0) { + if (word && regs.semaphore_sequence == 0) { + rasterizer->ReleaseFences(); std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } @@ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() { } void Puller::ProcessSemaphoreRelease() { - rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); + const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; + const u32 payload = regs.semaphore_release; + std::function operation([this, sequence_address, payload] { + memory_manager.Write(sequence_address, payload); + }); + rasterizer->SignalFence(std::move(operation)); } void Puller::ProcessSemaphoreAcquire() { - const u32 word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); + u32 word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); const auto value = regs.semaphore_acquire; - std::this_thread::sleep_for(std::chrono::milliseconds(5)); - if (word != value) { + while (word != value) { regs.acquire_active = true; regs.acquire_value = value; + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + rasterizer->ReleaseFences(); + word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); // TODO(kemathe73) figure out how to do the acquire_timeout regs.acquire_mode = false; regs.acquire_source = false; @@ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::SemaphoreAddressHigh: case BufferMethods::SemaphoreAddressLow: case BufferMethods::SemaphoreSequencePayload: - case BufferMethods::WrcacheFlush: case BufferMethods::SyncpointPayload: break; + case BufferMethods::WrcacheFlush: case BufferMethods::RefCnt: rasterizer->SignalReference(); break; @@ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { } case BufferMethods::MemOpB: { // Implement this better. - rasterizer->SyncGuestHost(); + rasterizer->InvalidateGPUCache(); break; } case BufferMethods::MemOpC: diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 03a70e5e0..c390ac91b 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include @@ -19,28 +21,7 @@ namespace VideoCommon { class FenceBase { public: - explicit FenceBase(u32 payload_, bool is_stubbed_) - : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} - - explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_) - : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} - - u8* GetAddress() const { - return address; - } - - u32 GetPayload() const { - return payload; - } - - bool IsSemaphore() const { - return is_semaphore; - } - -private: - u8* address; - u32 payload; - bool is_semaphore; + explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} protected: bool is_stubbed; @@ -60,31 +41,28 @@ public: buffer_cache.AccumulateFlushes(); } - void SignalSemaphore(u8* addr, u32 value) { + void SyncOperation(std::function&& func) { + uncommitted_operations.emplace_back(std::move(func)); + } + + void SignalFence(std::function&& func) { TryReleasePendingFences(); const bool should_flush = ShouldFlush(); CommitAsyncFlushes(); - TFence new_fence = CreateFence(addr, value, !should_flush); + uncommitted_operations.emplace_back(std::move(func)); + CommitOperations(); + TFence new_fence = CreateFence(!should_flush); fences.push(new_fence); QueueFence(new_fence); if (should_flush) { rasterizer.FlushCommands(); } - rasterizer.SyncGuestHost(); } void SignalSyncPoint(u32 value) { syncpoint_manager.IncrementGuest(value); - TryReleasePendingFences(); - const bool should_flush = ShouldFlush(); - CommitAsyncFlushes(); - TFence new_fence = CreateFence(value, !should_flush); - fences.push(new_fence); - QueueFence(new_fence); - if (should_flush) { - rasterizer.FlushCommands(); - } - rasterizer.SyncGuestHost(); + std::function func([this, value] { syncpoint_manager.IncrementHost(value); }); + SignalFence(std::move(func)); } void WaitPendingFences() { @@ -94,12 +72,10 @@ public: WaitFence(current_fence); } PopAsyncFlushes(); - if (current_fence->IsSemaphore()) { - char* address = reinterpret_cast(current_fence->GetAddress()); - auto payload = current_fence->GetPayload(); - std::memcpy(address, &payload, sizeof(payload)); - } else { - syncpoint_manager.IncrementHost(current_fence->GetPayload()); + auto operations = std::move(pending_operations.front()); + pending_operations.pop_front(); + for (auto& operation : operations) { + operation(); } PopFence(); } @@ -114,11 +90,9 @@ protected: virtual ~FenceManager() = default; - /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is + /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is /// true - virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; - /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true - virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0; + virtual TFence CreateFence(bool is_stubbed) = 0; /// Queues a fence into the backend if the fence isn't stubbed. virtual void QueueFence(TFence& fence) = 0; /// Notifies that the backend fence has been signaled/reached in host GPU. @@ -141,12 +115,10 @@ private: return; } PopAsyncFlushes(); - if (current_fence->IsSemaphore()) { - char* address = reinterpret_cast(current_fence->GetAddress()); - const auto payload = current_fence->GetPayload(); - std::memcpy(address, &payload, sizeof(payload)); - } else { - syncpoint_manager.IncrementHost(current_fence->GetPayload()); + auto operations = std::move(pending_operations.front()); + pending_operations.pop_front(); + for (auto& operation : operations) { + operation(); } PopFence(); } @@ -165,16 +137,20 @@ private: } void PopAsyncFlushes() { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - texture_cache.PopAsyncFlushes(); - buffer_cache.PopAsyncFlushes(); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + texture_cache.PopAsyncFlushes(); + buffer_cache.PopAsyncFlushes(); + } query_cache.PopAsyncFlushes(); } void CommitAsyncFlushes() { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - texture_cache.CommitAsyncFlushes(); - buffer_cache.CommitAsyncFlushes(); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + texture_cache.CommitAsyncFlushes(); + buffer_cache.CommitAsyncFlushes(); + } query_cache.CommitAsyncFlushes(); } @@ -183,7 +159,13 @@ private: fences.pop(); } + void CommitOperations() { + pending_operations.emplace_back(std::move(uncommitted_operations)); + } + std::queue fences; + std::deque> uncommitted_operations; + std::deque>> pending_operations; DelayedDestructionRing delayed_destruction_ring; }; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index a1d19b1c8..d7a3dd96b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -93,16 +93,13 @@ struct GPU::Impl { } /// Synchronizes CPU writes with Host GPU memory. - void SyncGuestHost() { - rasterizer->SyncGuestHost(); + void InvalidateGPUCache() { + rasterizer->InvalidateGPUCache(); } /// Signal the ending of command list. void OnCommandListEnd() { - if (is_async) { - // This command only applies to asynchronous GPU mode - gpu_thread.OnCommandListEnd(); - } + gpu_thread.OnCommandListEnd(); } /// Request a host GPU memory flush from the CPU. @@ -296,7 +293,7 @@ struct GPU::Impl { } void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - Service::Nvidia::NvFence* fences, size_t num_fences) { + std::array& fences, size_t num_fences) { size_t current_request_counter{}; { std::unique_lock lk(request_swap_mutex); @@ -412,8 +409,8 @@ void GPU::FlushCommands() { impl->FlushCommands(); } -void GPU::SyncGuestHost() { - impl->SyncGuestHost(); +void GPU::InvalidateGPUCache() { + impl->InvalidateGPUCache(); } void GPU::OnCommandListEnd() { @@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { } void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - Service::Nvidia::NvFence* fences, size_t num_fences) { + std::array& fences, size_t num_fences) { impl->RequestSwapBuffers(framebuffer, fences, num_fences); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 655373b33..0a4a8b14f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -110,7 +110,7 @@ public: /// Flush all current written commands into the host GPU for execution. void FlushCommands(); /// Synchronizes CPU writes with Host GPU memory. - void SyncGuestHost(); + void InvalidateGPUCache(); /// Signal the ending of command list. void OnCommandListEnd(); @@ -180,7 +180,7 @@ public: void RendererFrameEndNotify(); void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - Service::Nvidia::NvFence* fences, size_t num_fences); + std::array& fences, size_t num_fences); /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2c03545bf..1bd477011 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { } void ThreadManager::TickGPU() { - PushCommand(GPUTickCommand(), true); + PushCommand(GPUTickCommand()); } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5362aafb6..cb07f3d38 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -62,7 +62,10 @@ public: virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0; /// Signal a GPU based semaphore as a fence - virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; + virtual void SignalFence(std::function&& func) = 0; + + /// Send an operation to be done after a certain amount of flushes. + virtual void SyncOperation(std::function&& func) = 0; /// Signal a GPU based syncpoint as a fence virtual void SignalSyncPoint(u32 value) = 0; @@ -89,7 +92,7 @@ public: virtual void OnCPUWrite(VAddr addr, u64 size) = 0; /// Sync memory between guest and host. - virtual void SyncGuestHost() = 0; + virtual void InvalidateGPUCache() = 0; /// Unmap memory range virtual void UnmapMemory(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index c76446b60..91463f854 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -10,10 +10,7 @@ namespace OpenGL { -GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} - -GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_) - : FenceBase{address_, payload_, is_stubbed_} {} +GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {} GLInnerFence::~GLInnerFence() = default; @@ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize BufferCache& buffer_cache_, QueryCache& query_cache_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} -Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { - return std::make_shared(value, is_stubbed); -} - -Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) { - return std::make_shared(addr, value, is_stubbed); +Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) { + return std::make_shared(is_stubbed); } void FenceManagerOpenGL::QueueFence(Fence& fence) { diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index fced8d002..f1446e732 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -16,8 +16,7 @@ namespace OpenGL { class GLInnerFence : public VideoCommon::FenceBase { public: - explicit GLInnerFence(u32 payload_, bool is_stubbed_); - explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_); + explicit GLInnerFence(bool is_stubbed_); ~GLInnerFence(); void Queue(); @@ -40,8 +39,7 @@ public: QueryCache& query_cache); protected: - Fence CreateFence(u32 value, bool is_stubbed) override; - Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; + Fence CreateFence(bool is_stubbed) override; void QueueFence(Fence& fence) override; bool IsFenceSignaled(Fence& fence) const override; void WaitFence(Fence& fence) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b572950a6..6ebd6cff9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { } } -void RasterizerOpenGL::SyncGuestHost() { +void RasterizerOpenGL::InvalidateGPUCache() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); shader_cache.SyncGuestHost(); { @@ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { } } -void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { - if (!gpu.IsAsync()) { - gpu_memory->Write(addr, value); - return; - } - auto paddr = gpu_memory->GetPointer(addr); - fence_manager.SignalSemaphore(paddr, value); +void RasterizerOpenGL::SignalFence(std::function&& func) { + fence_manager.SignalFence(std::move(func)); +} + +void RasterizerOpenGL::SyncOperation(std::function&& func) { + fence_manager.SyncOperation(std::move(func)); } void RasterizerOpenGL::SignalSyncPoint(u32 value) { @@ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { } void RasterizerOpenGL::SignalReference() { - if (!gpu.IsAsync()) { - return; - } fence_manager.SignalOrdering(); } void RasterizerOpenGL::ReleaseFences() { - if (!gpu.IsAsync()) { - return; - } fence_manager.WaitPendingFences(); } @@ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() { } void RasterizerOpenGL::FragmentBarrier() { + glTextureBarrier(); glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d469075a1..fe0ba979a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,10 +80,11 @@ public: bool MustFlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; - void SyncGuestHost() override; + void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; - void SignalSemaphore(GPUVAddr addr, u32 value) override; + void SignalFence(std::function&& func) override; + void SyncOperation(std::function&& func) override; void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 301cbbabe..0214b103a 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -11,11 +11,8 @@ namespace Vulkan { -InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) - : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} - -InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_) - : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} +InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_) + : FenceBase{is_stubbed_}, scheduler{scheduler_} {} InnerFence::~InnerFence() = default; @@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, scheduler{scheduler_} {} -Fence FenceManager::CreateFence(u32 value, bool is_stubbed) { - return std::make_shared(scheduler, value, is_stubbed); -} - -Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) { - return std::make_shared(scheduler, addr, value, is_stubbed); +Fence FenceManager::CreateFence(bool is_stubbed) { + return std::make_shared(scheduler, is_stubbed); } void FenceManager::QueueFence(Fence& fence) { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index ea9e88052..7fe2afcd9 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -25,8 +25,7 @@ class Scheduler; class InnerFence : public VideoCommon::FenceBase { public: - explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); - explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_); + explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_); ~InnerFence(); void Queue(); @@ -50,8 +49,7 @@ public: QueryCache& query_cache, const Device& device, Scheduler& scheduler); protected: - Fence CreateFence(u32 value, bool is_stubbed) override; - Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; + Fence CreateFence(bool is_stubbed) override; void QueueFence(Fence& fence) override; bool IsFenceSignaled(Fence& fence) const override; void WaitFence(Fence& fence) override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7b57e0f3..a35e41199 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { } } -void RasterizerVulkan::SyncGuestHost() { +void RasterizerVulkan::InvalidateGPUCache() { pipeline_cache.SyncGuestHost(); { std::scoped_lock lock{buffer_cache.mutex}; @@ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { } } -void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { - if (!gpu.IsAsync()) { - gpu_memory->Write(addr, value); - return; - } - auto paddr = gpu_memory->GetPointer(addr); - fence_manager.SignalSemaphore(paddr, value); +void RasterizerVulkan::SignalFence(std::function&& func) { + fence_manager.SignalFence(std::move(func)); +} + +void RasterizerVulkan::SyncOperation(std::function&& func) { + fence_manager.SyncOperation(std::move(func)); } void RasterizerVulkan::SignalSyncPoint(u32 value) { @@ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { } void RasterizerVulkan::SignalReference() { - if (!gpu.IsAsync()) { - return; - } fence_manager.SignalOrdering(); } void RasterizerVulkan::ReleaseFences() { - if (!gpu.IsAsync()) { - return; - } fence_manager.WaitPendingFences(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c836158b8..fb9e83e8f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -76,10 +76,11 @@ public: bool MustFlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; - void SyncGuestHost() override; + void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; - void SignalSemaphore(GPUVAddr addr, u32 value) override; + void SignalFence(std::function&& func) override; + void SyncOperation(std::function&& func) override; void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; -- cgit v1.2.3