summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/gpu.cpp22
-rw-r--r--src/video_core/gpu.h21
-rw-r--r--src/video_core/gpu_thread.cpp17
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp4
6 files changed, 70 insertions, 8 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 19d3bd305..85a6c7bb5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
+u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) {
+ std::unique_lock lck{flush_request_mutex};
+ const u64 fence = ++last_flush_fence;
+ flush_requests.emplace_back(fence, addr, size);
+ return fence;
+}
+
+void GPU::TickWork() {
+ std::unique_lock lck{flush_request_mutex};
+ while (!flush_requests.empty()) {
+ auto& request = flush_requests.front();
+ const u64 fence = request.fence;
+ const CacheAddr addr = request.addr;
+ const std::size_t size = request.size;
+ flush_requests.pop_front();
+ flush_request_mutex.unlock();
+ renderer->Rasterizer().FlushRegion(addr, size);
+ current_flush_fence.store(fence);
+ flush_request_mutex.lock();
+ }
+}
+
u64 GPU::GetTicks() const {
// This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fa9991c87..943a5b110 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -159,6 +159,14 @@ public:
void SyncGuestHost();
virtual void OnCommandListEnd();
+ u64 RequestFlush(CacheAddr addr, std::size_t size);
+
+ u64 CurrentFlushRequestFence() const {
+ return current_flush_fence.load(std::memory_order_relaxed);
+ }
+
+ void TickWork();
+
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();
@@ -327,6 +335,19 @@ private:
std::condition_variable sync_cv;
+ struct FlushRequest {
+ FlushRequest(u64 fence, CacheAddr addr, std::size_t size)
+ : fence{fence}, addr{addr}, size{size} {}
+ u64 fence;
+ CacheAddr addr;
+ std::size_t size;
+ };
+
+ std::list<FlushRequest> flush_requests;
+ std::atomic<u64> current_flush_fence{};
+ u64 last_flush_fence{};
+ std::mutex flush_request_mutex;
+
const bool is_async;
};
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3e2be00e9..9460364a3 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -15,8 +15,9 @@
namespace VideoCommon::GPUThread {
/// Runs the GPU thread
-static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher, SynchState& state) {
+static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
+ Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
+ SynchState& state) {
MicroProfileOnThreadCreate("GpuThread");
// Wait for first GPU command before acquiring the window context
@@ -40,6 +41,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
renderer.Rasterizer().ReleaseFences();
+ } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
+ system.GPU().TickWork();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -68,8 +71,8 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher) {
- thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
- std::ref(state)};
+ thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
+ std::ref(context), std::ref(dma_pusher), std::ref(state)};
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -85,8 +88,10 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
return;
}
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
- u64 fence = PushCommand(FlushRegionCommand(addr, size));
- while (fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+ auto& gpu = system.GPU();
+ u64 fence = gpu.RequestFlush(addr, size);
+ PushCommand(GPUTickCommand());
+ while (fence > gpu.CurrentFlushRequestFence()) {
}
}
}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 9d0877921..5a28335d6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,12 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
u64 size;
};
-/// Command to signal to the GPU thread that processing has ended
+/// Command called within the gpu, to schedule actions after a command list end
struct OnCommandListEndCommand final {};
+/// Command to make the gpu look into pending requests
+struct GPUTickCommand final {};
+
using CommandData =
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
- InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
+ InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
+ GPUTickCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e52e5961f..fbd81b895 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -601,6 +601,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
EndTransformFeedback();
++num_queued_commands;
+
+ system.GPU().TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -628,6 +630,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
+ system.GPU().TickWork();
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -652,6 +655,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+ if (!Settings::IsGPULevelExtreme()) {
+ return buffer_cache.MustFlushRegion(addr, size);
+ }
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 507262c8f..926ecf38e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -365,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
});
EndTransformFeedback();
+
+ system.GPU().TickWork();
}
void RasterizerVulkan::Clear() {
@@ -492,6 +494,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
descriptor_set, {});
cmdbuf.Dispatch(grid_x, grid_y, grid_z);
});
+
+ system.GPU().TickWork();
}
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {