From da440da9f54cc860f3c69da685a415d5ec9d7b64 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 28 Jun 2023 19:32:50 +0200 Subject: Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU --- src/core/memory.cpp | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'src/core/memory.cpp') diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 60b246bdd..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "common/assert.h" #include "common/atomic_ops.h" @@ -679,7 +680,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, GetInteger(vaddr), static_cast(data)); }, - [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); + [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); if (ptr) { std::memcpy(ptr, &data, sizeof(T)); } @@ -693,7 +694,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, GetInteger(vaddr), static_cast(data)); }, - [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); + [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); if (ptr) { const auto volatile_pointer = reinterpret_cast(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -708,7 +709,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", GetInteger(vaddr), static_cast(data[1]), static_cast(data[0])); }, - [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); }); + [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); if (ptr) { const auto volatile_pointer = reinterpret_cast(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -718,7 +719,7 @@ struct Memory::Impl { void HandleRasterizerDownload(VAddr address, size_t size) { const size_t core = system.GetCurrentHostThreadID(); - auto& current_area = rasterizer_areas[core]; + auto& current_area = rasterizer_read_areas[core]; const VAddr end_address = address + size; if (current_area.start_address <= address && end_address <= current_area.end_address) [[likely]] { @@ -727,9 +728,31 @@ struct Memory::Impl { current_area = system.GPU().OnCPURead(address, size); } - Common::PageTable* current_page_table = nullptr; - std::array rasterizer_areas{}; + void HandleRasterizerWrite(VAddr address, size_t size) { + const size_t core = system.GetCurrentHostThreadID(); + auto& current_area = rasterizer_write_areas[core]; + VAddr subaddress = address >> YUZU_PAGEBITS; + bool do_collection = current_area.last_address == subaddress; + if (!do_collection) [[unlikely]] { + do_collection = system.GPU().OnCPUWrite(address, size); + if (!do_collection) { + return; + } + current_area.last_address = subaddress; + } + gpu_dirty_managers[core].Collect(address, size); + } + + struct GPUDirtyState { + VAddr last_address; + }; + Core::System& system; + Common::PageTable* current_page_table = nullptr; + std::array + rasterizer_read_areas{}; + std::array rasterizer_write_areas{}; + std::span gpu_dirty_managers; }; Memory::Memory(Core::System& system_) : system{system_} { @@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); } +void Memory::SetGPUDirtyManagers(std::span managers) { + impl->gpu_dirty_managers = managers; +} + Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); } -- cgit v1.2.3