diff options
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r-- | src/video_core/gpu.cpp | 211 |
1 files changed, 163 insertions, 48 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8acf2eda2..ebd149c3a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> + #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" @@ -9,6 +11,7 @@ #include "core/core_timing_util.h" #include "core/frontend/emu_window.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" @@ -17,26 +20,36 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/shader_notify.h" #include "video_core/video_core.h" namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) - : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { - auto& rasterizer{renderer->Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); - maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); - fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); - kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); - maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); -} +GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) + : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, + dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, + cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_}, + maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, + fermi_2d{std::make_unique<Engines::Fermi2D>()}, + kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, + maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, + kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, + shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} GPU::~GPU() = default; +void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { + renderer = std::move(renderer_); + + VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); + memory_manager->BindRasterizer(rasterizer); + maxwell_3d->BindRasterizer(rasterizer); + fermi_2d->BindRasterizer(rasterizer); + kepler_compute->BindRasterizer(rasterizer); +} + Engines::Maxwell3D& GPU::Maxwell3D() { return *maxwell_3d; } @@ -65,10 +78,18 @@ DmaPusher& GPU::DmaPusher() { return *dma_pusher; } +Tegra::CDmaPusher& GPU::CDmaPusher() { + return *cdma_pusher; +} + const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +const Tegra::CDmaPusher& GPU::CDmaPusher() const { + return *cdma_pusher; +} + void GPU::WaitFence(u32 syncpoint_id, u32 value) { // Synced GPU, is always in sync if (!is_async) { @@ -76,7 +97,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) { } MICROPROFILE_SCOPE(GPU_wait); std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; }); + sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; }); } void GPU::IncrementSyncPoint(const u32 syncpoint_id) { @@ -125,14 +146,38 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::RequestFlush(VAddr addr, std::size_t size) { + std::unique_lock lck{flush_request_mutex}; + const u64 fence = ++last_flush_fence; + flush_requests.emplace_back(fence, addr, size); + return fence; +} + +void GPU::TickWork() { + std::unique_lock lck{flush_request_mutex}; + while (!flush_requests.empty()) { + auto& request = flush_requests.front(); + const u64 fence = request.fence; + const VAddr addr = request.addr; + const std::size_t size = request.size; + flush_requests.pop_front(); + flush_request_mutex.unlock(); + renderer->Rasterizer().FlushRegion(addr, size); + current_flush_fence.store(fence); + flush_request_mutex.lock(); + } +} + u64 GPU::GetTicks() const { // This values were reversed engineered by fincs from NVN // The gpu clock is reported in units of 385/625 nanoseconds constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - const u64 cpu_ticks = system.CoreTiming().GetTicks(); - const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + if (Settings::values.use_fast_gpu_time.GetValue()) { + nanoseconds /= 256; + } const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; @@ -142,30 +187,13 @@ void GPU::FlushCommands() { renderer->Rasterizer().FlushCommands(); } -// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence -// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. -// So the values you see in docs might be multiplied by 4. -enum class BufferMethods { - BindObject = 0x0, - Nop = 0x2, - SemaphoreAddressHigh = 0x4, - SemaphoreAddressLow = 0x5, - SemaphoreSequence = 0x6, - SemaphoreTrigger = 0x7, - NotifyIntr = 0x8, - WrcacheFlush = 0x9, - Unk28 = 0xA, - UnkCacheFlush = 0xB, - RefCnt = 0x14, - SemaphoreAcquire = 0x1A, - SemaphoreRelease = 0x1B, - FenceValue = 0x1C, - FenceAction = 0x1D, - Unk78 = 0x1E, - Unk7c = 0x1F, - Yield = 0x20, - NonPullerMethods = 0x40, -}; +void GPU::SyncGuestHost() { + renderer->Rasterizer().SyncGuestHost(); +} + +void GPU::OnCommandListEnd() { + renderer->Rasterizer().ReleaseFences(); +} enum class GpuSemaphoreOperation { AcquireEqual = 0x1, @@ -180,16 +208,32 @@ void GPU::CallMethod(const MethodCall& method_call) { ASSERT(method_call.subchannel < bound_engines.size()); - if (ExecuteMethodOnEngine(method_call)) { + if (ExecuteMethodOnEngine(method_call.method)) { CallEngineMethod(method_call); } else { CallPullerMethod(method_call); } } -bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { - const auto method = static_cast<BufferMethods>(method_call.method); - return method >= BufferMethods::NonPullerMethods; +void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, + u32 methods_pending) { + LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); + + ASSERT(subchannel < bound_engines.size()); + + if (ExecuteMethodOnEngine(method)) { + CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); + } else { + for (std::size_t i = 0; i < amount; i++) { + CallPullerMethod( + {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); + } + } +} + +bool GPU::ExecuteMethodOnEngine(u32 method) { + const auto buffer_method = static_cast<BufferMethods>(method); + return buffer_method >= BufferMethods::NonPullerMethods; } void GPU::CallPullerMethod(const MethodCall& method_call) { @@ -209,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::UnkCacheFlush: case BufferMethods::WrcacheFlush: case BufferMethods::FenceValue: + break; case BufferMethods::FenceAction: + ProcessFenceActionMethod(); + break; + case BufferMethods::WaitForInterrupt: + ProcessWaitForInterruptMethod(); break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -250,19 +299,46 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { switch (engine) { case EngineID::FERMI_TWOD_A: - fermi_2d->CallMethod(method_call); + fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + break; + case EngineID::MAXWELL_B: + maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + break; + case EngineID::KEPLER_COMPUTE_B: + kepler_compute->CallMethod(method_call.method, method_call.argument, + method_call.IsLastCall()); + break; + case EngineID::MAXWELL_DMA_COPY_A: + maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + break; + case EngineID::KEPLER_INLINE_TO_MEMORY_B: + kepler_memory->CallMethod(method_call.method, method_call.argument, + method_call.IsLastCall()); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented engine"); + } +} + +void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, + u32 methods_pending) { + const EngineID engine = bound_engines[subchannel]; + + switch (engine) { + case EngineID::FERMI_TWOD_A: + fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::MAXWELL_B: - maxwell_3d->CallMethod(method_call); + maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::KEPLER_COMPUTE_B: - kepler_compute->CallMethod(method_call); + kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::MAXWELL_DMA_COPY_A: - maxwell_dma->CallMethod(method_call); + maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - kepler_memory->CallMethod(method_call); + kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); @@ -273,7 +349,46 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { // Bind the current subchannel to the desired engine id. LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, method_call.argument); - bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); + const auto engine_id = static_cast<EngineID>(method_call.argument); + bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); + switch (engine_id) { + case EngineID::FERMI_TWOD_A: + dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); + break; + case EngineID::MAXWELL_B: + dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); + break; + case EngineID::KEPLER_COMPUTE_B: + dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); + break; + case EngineID::MAXWELL_DMA_COPY_A: + dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); + break; + case EngineID::KEPLER_INLINE_TO_MEMORY_B: + dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); + } +} + +void GPU::ProcessFenceActionMethod() { + switch (regs.fence_action.op) { + case FenceOperation::Acquire: + WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + break; + case FenceOperation::Increment: + IncrementSyncPoint(regs.fence_action.syncpoint_id); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented operation {}", + static_cast<u32>(regs.fence_action.op.Value())); + } +} + +void GPU::ProcessWaitForInterruptMethod() { + // TODO(bunnei) ImplementMe + LOG_WARNING(HW_GPU, "(STUBBED) called"); } void GPU::ProcessSemaphoreTriggerMethod() { |