From 96fd1348aea9d70cb502a94cbd0412be6edb0189 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Dec 2023 09:50:04 +0100
Subject: GPU SMMU: Expand to 34 bits

---
 src/core/hle/service/nvdrv/core/nvmap.cpp          | 38 +++++++++++++++++++---
 .../service/nvdrv/devices/nvhost_nvdec_common.cpp  |  1 -
 src/video_core/gpu.cpp                             |  1 +
 src/video_core/host1x/codecs/h264.cpp              |  9 +++--
 src/video_core/host1x/codecs/vp8.cpp               |  4 +--
 src/video_core/host1x/codecs/vp9.cpp               |  6 ++--
 src/video_core/host1x/gpu_device_memory_manager.h  |  2 +-
 src/video_core/host1x/host1x.cpp                   |  4 ++-
 src/video_core/host1x/host1x.h                     | 20 ++++++++++++
 src/video_core/host1x/vic.cpp                      | 10 +++---
 src/video_core/memory_manager.cpp                  | 18 ++++++----
 src/video_core/memory_manager.h                    |  2 ++
 12 files changed, 86 insertions(+), 29 deletions(-)

(limited to 'src')
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index e4168a37c..0b2ddd980 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -80,6 +80,15 @@ void NvMap::UnmapHandle(Handle& handle_description) {
         handle_description.unmap_queue_entry.reset();
     }
 
+    // Free and unmap the handle from Host1x GMMU
+    if (handle_description.pin_virt_address) {
+        host1x.GMMU().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
+                            handle_description.aligned_size);
+        host1x.Allocator().Free(handle_description.pin_virt_address,
+                                static_cast<u32>(handle_description.aligned_size));
+        handle_description.pin_virt_address = 0;
+    }
+
     // Free and unmap the handle from the SMMU
     auto& smmu = host1x.MemoryManager();
     smmu.Unmap(handle_description.d_address, handle_description.aligned_size);
@@ -141,6 +150,17 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
     }
 
     std::scoped_lock lock(handle_description->mutex);
+    const auto map_low_area = [&] {
+        if (handle_description->pin_virt_address == 0) {
+            auto& gmmu_allocator = host1x.Allocator();
+            auto& gmmu = host1x.GMMU();
+            u32 address =
+                gmmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size));
+            gmmu.Map(static_cast<GPUVAddr>(address), handle_description->d_address,
+                     handle_description->aligned_size);
+            handle_description->pin_virt_address = address;
+        }
+    };
     if (!handle_description->pins) {
         // If we're in the unmap queue we can just remove ourselves and return since we're already
         // mapped
@@ -152,6 +172,12 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
                 unmap_queue.erase(*handle_description->unmap_queue_entry);
                 handle_description->unmap_queue_entry.reset();
 
+                if (low_area_pin) {
+                    map_low_area();
+                    handle_description->pins++;
+                    return static_cast<DAddr>(handle_description->pin_virt_address);
+                }
+
                 handle_description->pins++;
                 return handle_description->d_address;
             }
@@ -162,10 +188,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
         DAddr address{};
         auto& smmu = host1x.MemoryManager();
         auto* session = core.GetSession(session_id);
-
-        auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
-                         //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
-        while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
+        while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) {
             // Free handles until the allocation succeeds
             std::scoped_lock queueLock(unmap_queue_lock);
             if (auto freeHandleDesc{unmap_queue.front()}) {
@@ -185,7 +208,14 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
                  session->smmu_id);
     }
 
+    if (low_area_pin) {
+        map_low_area();
+    }
+
     handle_description->pins++;
+    if (low_area_pin) {
+        return static_cast<DAddr>(handle_description->pin_virt_address);
+    }
     return handle_description->d_address;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 78bc5f3c4..0b6aa9993 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -95,7 +95,6 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
     offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
 
     auto& gpu = system.GPU();
-    //auto& device_memory = system.Host1x().MemoryManager();
     auto* session = core.GetSession(sessions[fd]);
 
     if (gpu.UseNvdec()) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 5f780507b..6ad3b94f8 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -88,6 +88,7 @@ struct GPU::Impl {
         renderer = std::move(renderer_);
         rasterizer = renderer->ReadRasterizer();
         host1x.MemoryManager().BindInterface(rasterizer);
+        host1x.GMMU().BindRasterizer(rasterizer);
     }
 
     /// Flush all current written commands into the host GPU for execution.
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 309a7f1d5..994591c8d 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -32,13 +32,12 @@ H264::~H264() = default;
 std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
                                        size_t* out_configuration_size, bool is_first_frame) {
     H264DecoderContext context;
-    host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
-                                     sizeof(H264DecoderContext));
+    host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
 
     const s64 frame_number = context.h264_parameter_set.frame_number.Value();
     if (!is_first_frame && frame_number != 0) {
         frame.resize_destructive(context.stream_len);
-        host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
+        host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
         *out_configuration_size = 0;
         return frame;
     }
@@ -159,8 +158,8 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
     std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
 
     *out_configuration_size = encoded_header.size();
-    host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
-                                     frame.data() + encoded_header.size(), context.stream_len);
+    host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(),
+                            context.stream_len);
 
     return frame;
 }
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index ee6392ff9..be97e3b00 100644
--- a/src/video_core/host1x/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -14,7 +14,7 @@ VP8::~VP8() = default;
 
 std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
     VP8PictureInfo info;
-    host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
+    host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
 
     const bool is_key_frame = info.key_frame == 1u;
     const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
@@ -45,7 +45,7 @@ std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters&
         frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
     }
     const u64 bitstream_offset = state.frame_bitstream_offset;
-    host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
+    host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
 
     return frame;
 }
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index 306c3d0e8..e2ae1f76d 100644
--- a/src/video_core/host1x/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -358,7 +358,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
 
 Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
     PictureInfo picture_info;
-    host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
+    host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
     Vp9PictureInfo vp9_info = picture_info.Convert();
 
     InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
@@ -373,7 +373,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters&
 
 void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
     EntropyProbs entropy;
-    host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
+    host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
     entropy.Convert(dst);
 }
 
@@ -383,7 +383,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters
         // gpu.SyncGuestHost(); epic, why?
         current_frame.info = GetVp9PictureInfo(state);
         current_frame.bit_stream.resize(current_frame.info.bitstream_size);
-        host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+        host1x.GMMU().ReadBlock(state.frame_bitstream_offset,
                                          current_frame.bit_stream.data(),
                                          current_frame.info.bitstream_size);
     }
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h
index a406ce965..6c7858848 100644
--- a/src/video_core/host1x/gpu_device_memory_manager.h
+++ b/src/video_core/host1x/gpu_device_memory_manager.h
@@ -15,7 +15,7 @@ struct MaxwellDeviceMethods;
 
 struct MaxwellDeviceTraits {
     static constexpr bool supports_pinning = false;
-    static constexpr size_t device_virtual_bits = 32;
+    static constexpr size_t device_virtual_bits = 34;
     using DeviceInterface = typename VideoCore::RasterizerInterface;
     using DeviceMethods = typename MaxwellDeviceMethods;
 };
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp
index d05bcaf26..b7f9a08cf 100644
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@@ -9,7 +9,9 @@ namespace Tegra {
 namespace Host1x {
 
 Host1x::Host1x(Core::System& system_)
-    : system{system_}, syncpoint_manager{}, memory_manager(system.DeviceMemory()) {}
+    : system{system_}, syncpoint_manager{},
+      memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
+      allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
 
 } // namespace Host1x
 
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
index 18f7389f6..13c37e6b4 100644
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@@ -5,8 +5,10 @@
 
 #include "common/common_types.h"
 
+#include "common/address_space.h"
 #include "video_core/host1x/gpu_device_memory_manager.h"
 #include "video_core/host1x/syncpoint_manager.h"
+#include "video_core/memory_manager.h"
 
 namespace Core {
 class System;
@@ -36,10 +38,28 @@ public:
         return memory_manager;
     }
 
+    Tegra::MemoryManager& GMMU() {
+        return gmmu_manager;
+    }
+
+    const Tegra::MemoryManager& GMMU() const {
+        return gmmu_manager;
+    }
+
+    Common::FlatAllocator<u32, 0, 32>& Allocator() {
+        return *allocator;
+    }
+
+    const Common::FlatAllocator<u32, 0, 32>& Allocator() const {
+        return *allocator;
+    }
+
 private:
     Core::System& system;
     SyncpointManager syncpoint_manager;
     Tegra::MaxwellDeviceMemoryManager memory_manager;
+    Tegra::MemoryManager gmmu_manager;
+    std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
 };
 
 } // namespace Host1x
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 2a5eba415..1826211a1 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -81,7 +81,7 @@ void Vic::Execute() {
         LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
         return;
     }
-    const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)};
+    const VicConfig config{host1x.GMMU().Read<u64>(config_struct_address + 0x20)};
     auto frame = nvdec_processor->GetFrame();
     if (!frame) {
         return;
@@ -162,11 +162,11 @@ void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
         Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height,
                                 block_height, 0, width * 4);
 
-        host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
+        host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
     } else {
         // send pitch linear frame
         const size_t linear_size = width * height * 4;
-        host1x.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
+        host1x.GMMU().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
                                           linear_size);
     }
 }
@@ -193,7 +193,7 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
         const std::size_t dst = y * aligned_width;
         std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width);
     }
-    host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
+    host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(),
                                       luma_buffer.size());
 
     // Chroma
@@ -233,7 +233,7 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
         ASSERT(false);
         break;
     }
-    host1x.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
+    host1x.GMMU().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
                                       chroma_buffer.size());
 }
 
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 82f7a1c3b..ac1417fbc 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -16,18 +16,17 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
 
-
 namespace Tegra {
 using Tegra::Memory::GuestMemoryFlags;
 
 std::atomic<size_t> MemoryManager::unique_identifier_generator{};
 
-MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
-                             u64 page_bits_)
-    : system{system_}, memory{system.Host1x().MemoryManager()},
-      address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
-      entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
-                                           page_bits != big_page_bits ? page_bits : 0},
+MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
+                             u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_)
+    : system{system_}, memory{memory_}, address_space_bits{address_space_bits_},
+      page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{},
+      page_table{address_space_bits, address_space_bits + page_bits - 38,
+                 page_bits != big_page_bits ? page_bits : 0},
       kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
                                       1, std::memory_order_acq_rel)},
       accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
@@ -49,6 +48,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
     entries.resize(page_table_size / 32, 0);
 }
 
+MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
+                             u64 page_bits_)
+    : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_,
+                    page_bits_) {}
+
 MemoryManager::~MemoryManager() = default;
 
 template <bool is_big_page>
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index e2912a73f..6b2cd7efb 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -38,6 +38,8 @@ class MemoryManager final {
 public:
     explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
                            u64 big_page_bits_ = 16, u64 page_bits_ = 12);
+    explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, u64 address_space_bits_ = 40,
+                           u64 big_page_bits_ = 16, u64 page_bits_ = 12);
     ~MemoryManager();
 
     size_t GetID() const {
-- 
cgit v1.2.3