summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2018-11-28 01:17:33 +0100
committerbunnei <bunneidev@gmail.com>2018-11-28 01:17:33 +0100
commitac74b71d7530452126792c5fa0bf01fe7378ba00 (patch)
tree7db6044f15ded8659aff9fd822d41139c495e171
parentgpu: Move command list profiling to DmaPusher::DispatchCalls. (diff)
downloadyuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar
yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.gz
yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.bz2
yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.lz
yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.xz
yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.zst
yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.zip
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp14
-rw-r--r--src/video_core/dma_pusher.cpp12
-rw-r--r--src/video_core/dma_pusher.h10
3 files changed, 23 insertions, 13 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 39a58b685..2e2b0ae1c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -128,11 +128,9 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
return 0;
}
-static void PushGPUEntries(const std::vector<Tegra::CommandListHeader>& entries) {
+static void PushGPUEntries(Tegra::CommandList&& entries) {
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
- for (const auto& entry : entries) {
- dma_pusher.Push(entry);
- }
+ dma_pusher.Push(std::move(entries));
dma_pusher.DispatchCalls();
}
@@ -149,11 +147,11 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
params.num_entries * sizeof(Tegra::CommandListHeader),
"Incorrect input size");
- std::vector<Tegra::CommandListHeader> entries(params.num_entries);
+ Tegra::CommandList entries(params.num_entries);
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
params.num_entries * sizeof(Tegra::CommandListHeader));
- PushGPUEntries(entries);
+ PushGPUEntries(std::move(entries));
params.fence_out.id = 0;
params.fence_out.value = 0;
@@ -170,11 +168,11 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
params.address, params.num_entries, params.flags);
- std::vector<Tegra::CommandListHeader> entries(params.num_entries);
+ Tegra::CommandList entries(params.num_entries);
Memory::ReadBlock(params.address, entries.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
- PushGPUEntries(entries);
+ PushGPUEntries(std::move(entries));
params.fence_out.id = 0;
params.fence_out.value = 0;
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 23ec97944..63a958f11 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -23,6 +23,8 @@ void DmaPusher::DispatchCalls() {
// On entering GPU code, assume all memory may be touched by the ARM core.
gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
+ dma_pushbuffer_subindex = 0;
+
while (Core::System::GetInstance().IsPoweredOn()) {
if (!Step()) {
break;
@@ -89,11 +91,17 @@ bool DmaPusher::Step() {
}
} else if (ib_enable && !dma_pushbuffer.empty()) {
// Current pushbuffer empty, but we have more IB entries to read
- const CommandListHeader& command_list_header{dma_pushbuffer.front()};
+ const CommandList& command_list{dma_pushbuffer.front()};
+ const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
dma_get = command_list_header.addr;
dma_put = dma_get + command_list_header.size * sizeof(u32);
non_main = command_list_header.is_non_main;
- dma_pushbuffer.pop();
+
+ if (dma_pushbuffer_subindex >= command_list.size()) {
+ // We've gone through the current list, remove it from the queue
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ }
} else {
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
return {};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 39d98e46e..16e0697c4 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -4,6 +4,7 @@
#pragma once
+#include <vector>
#include <queue>
#include "common/bit_field.h"
@@ -45,6 +46,8 @@ static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect
class GPU;
+using CommandList = std::vector<Tegra::CommandListHeader>;
+
/**
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
* emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
@@ -57,8 +60,8 @@ public:
explicit DmaPusher(GPU& gpu);
~DmaPusher();
- void Push(const CommandListHeader& command_list_header) {
- dma_pushbuffer.push(command_list_header);
+ void Push(CommandList&& entries) {
+ dma_pushbuffer.push(std::move(entries));
}
void DispatchCalls();
@@ -72,7 +75,8 @@ private:
GPU& gpu;
- std::queue<CommandListHeader> dma_pushbuffer;
+ std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
+ std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
struct DmaState {
u32 method; ///< Current method