summaryrefslogtreecommitdiffstats
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp211
1 files changed, 163 insertions, 48 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8acf2eda2..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <chrono>
+
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
@@ -9,6 +11,7 @@
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
+#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
@@ -17,26 +20,36 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
+#include "video_core/shader_notify.h"
#include "video_core/video_core.h"
namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
- : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
- auto& rasterizer{renderer->Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
- dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
- maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
- kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
- maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
- kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
-}
+GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
+ : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+ dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+ cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
+ maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
+ fermi_2d{std::make_unique<Engines::Fermi2D>()},
+ kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
+ maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
+ kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+ shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
GPU::~GPU() = default;
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+ renderer = std::move(renderer_);
+
+ VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
+ memory_manager->BindRasterizer(rasterizer);
+ maxwell_3d->BindRasterizer(rasterizer);
+ fermi_2d->BindRasterizer(rasterizer);
+ kepler_compute->BindRasterizer(rasterizer);
+}
+
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
@@ -65,10 +78,18 @@ DmaPusher& GPU::DmaPusher() {
return *dma_pusher;
}
+Tegra::CDmaPusher& GPU::CDmaPusher() {
+ return *cdma_pusher;
+}
+
const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
+const Tegra::CDmaPusher& GPU::CDmaPusher() const {
+ return *cdma_pusher;
+}
+
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
// Synced GPU, is always in sync
if (!is_async) {
@@ -76,7 +97,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
}
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
+ sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
@@ -125,14 +146,38 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
+u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
+ std::unique_lock lck{flush_request_mutex};
+ const u64 fence = ++last_flush_fence;
+ flush_requests.emplace_back(fence, addr, size);
+ return fence;
+}
+
+void GPU::TickWork() {
+ std::unique_lock lck{flush_request_mutex};
+ while (!flush_requests.empty()) {
+ auto& request = flush_requests.front();
+ const u64 fence = request.fence;
+ const VAddr addr = request.addr;
+ const std::size_t size = request.size;
+ flush_requests.pop_front();
+ flush_request_mutex.unlock();
+ renderer->Rasterizer().FlushRegion(addr, size);
+ current_flush_fence.store(fence);
+ flush_request_mutex.lock();
+ }
+}
+
u64 GPU::GetTicks() const {
// This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
- const u64 cpu_ticks = system.CoreTiming().GetTicks();
- const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+ u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
+ if (Settings::values.use_fast_gpu_time.GetValue()) {
+ nanoseconds /= 256;
+ }
const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
@@ -142,30 +187,13 @@ void GPU::FlushCommands() {
renderer->Rasterizer().FlushCommands();
}
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
- BindObject = 0x0,
- Nop = 0x2,
- SemaphoreAddressHigh = 0x4,
- SemaphoreAddressLow = 0x5,
- SemaphoreSequence = 0x6,
- SemaphoreTrigger = 0x7,
- NotifyIntr = 0x8,
- WrcacheFlush = 0x9,
- Unk28 = 0xA,
- UnkCacheFlush = 0xB,
- RefCnt = 0x14,
- SemaphoreAcquire = 0x1A,
- SemaphoreRelease = 0x1B,
- FenceValue = 0x1C,
- FenceAction = 0x1D,
- Unk78 = 0x1E,
- Unk7c = 0x1F,
- Yield = 0x20,
- NonPullerMethods = 0x40,
-};
+void GPU::SyncGuestHost() {
+ renderer->Rasterizer().SyncGuestHost();
+}
+
+void GPU::OnCommandListEnd() {
+ renderer->Rasterizer().ReleaseFences();
+}
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
@@ -180,16 +208,32 @@ void GPU::CallMethod(const MethodCall& method_call) {
ASSERT(method_call.subchannel < bound_engines.size());
- if (ExecuteMethodOnEngine(method_call)) {
+ if (ExecuteMethodOnEngine(method_call.method)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
}
}
-bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
- const auto method = static_cast<BufferMethods>(method_call.method);
- return method >= BufferMethods::NonPullerMethods;
+void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending) {
+ LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+ ASSERT(subchannel < bound_engines.size());
+
+ if (ExecuteMethodOnEngine(method)) {
+ CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+ } else {
+ for (std::size_t i = 0; i < amount; i++) {
+ CallPullerMethod(
+ {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
+ }
+ }
+}
+
+bool GPU::ExecuteMethodOnEngine(u32 method) {
+ const auto buffer_method = static_cast<BufferMethods>(method);
+ return buffer_method >= BufferMethods::NonPullerMethods;
}
void GPU::CallPullerMethod(const MethodCall& method_call) {
@@ -209,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
+ break;
case BufferMethods::FenceAction:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForInterrupt:
+ ProcessWaitForInterruptMethod();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
@@ -250,19 +299,46 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
switch (engine) {
case EngineID::FERMI_TWOD_A:
- fermi_2d->CallMethod(method_call);
+ fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+ break;
+ case EngineID::MAXWELL_B:
+ maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ kepler_compute->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ kepler_memory->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
+ }
+}
+
+void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending) {
+ const EngineID engine = bound_engines[subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_B:
- maxwell_3d->CallMethod(method_call);
+ maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_COMPUTE_B:
- kepler_compute->CallMethod(method_call);
+ kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->CallMethod(method_call);
+ maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- kepler_memory->CallMethod(method_call);
+ kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
@@ -273,7 +349,46 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
- bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
+ const auto engine_id = static_cast<EngineID>(method_call.argument);
+ bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+ switch (engine_id) {
+ case EngineID::FERMI_TWOD_A:
+ dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
+ break;
+ case EngineID::MAXWELL_B:
+ dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id));
+ }
+}
+
+void GPU::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case FenceOperation::Acquire:
+ WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ break;
+ case FenceOperation::Increment:
+ IncrementSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}",
+ static_cast<u32>(regs.fence_action.op.Value()));
+ }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+ // TODO(bunnei) ImplementMe
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
}
void GPU::ProcessSemaphoreTriggerMethod() {