author: bunnei <bunneidev@gmail.com> 2019-04-22 23:09:00 +0200
committer: GitHub <noreply@github.com> 2019-04-22 23:09:00 +0200
commit: 68b707711a521f7987e1abcd1661b4575e3d270d (patch)
tree: af22e59d157b8b7a3e6260ca820aba3613197518
parent: Merge pull request #2400 from FernandoS27/corret-kepler-mem (diff)
parent: make ReadBlockunsafe and WriteBlockunsafe, ignore invalid pages. (diff)
download: yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar
yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.gz
yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.bz2
yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.lz
yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.xz
yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.zst
yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.zip
5 files changed, 99 insertions, 15 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 046d047cb..6674d9405 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -57,8 +57,8 @@ bool DmaPusher::Step() {
 
     // Push buffer non-empty, read a word
     command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
-                                  command_list_header.size * sizeof(u32));
+    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                        command_list_header.size * sizeof(u32));
 
     for (const CommandHeader& command_header : command_headers) {
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b198793bc..9780417f2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -418,7 +418,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
 
     Texture::TICEntry tic_entry;
-    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
 
     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -439,7 +439,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
     const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
 
     Texture::TSCEntry tsc_entry;
-    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
     return tsc_entry;
 }
 
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 0f4e820aa..6c98c6701 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -199,7 +199,15 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
     return {};
 }
 
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
+bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
+    const GPUVAddr end = start + size;
+    const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
+    const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
+    const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
+    return range == size;
+}
+
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
     std::size_t remaining_size{size};
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
@@ -226,7 +234,30 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t
     }
 }
 
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
+                                    const std::size_t size) const {
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+        const u8* page_pointer = page_table.pointers[page_index];
+        if (page_pointer) {
+            const u8* src_ptr{page_pointer + page_offset};
+            std::memcpy(dest_buffer, src_ptr, copy_amount);
+        } else {
+            std::memset(dest_buffer, 0, copy_amount);
+        }
+        page_index++;
+        page_offset = 0;
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
+}
+
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
     std::size_t remaining_size{size};
     std::size_t page_index{dest_addr >> page_bits};
     std::size_t page_offset{dest_addr & page_mask};
@@ -253,7 +284,28 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::
     }
 }
 
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
+                                     const std::size_t size) {
+    std::size_t remaining_size{size};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+        u8* page_pointer = page_table.pointers[page_index];
+        if (page_pointer) {
+            u8* dest_ptr{page_pointer + page_offset};
+            std::memcpy(dest_ptr, src_buffer, copy_amount);
+        }
+        page_index++;
+        page_offset = 0;
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
+}
+
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
     std::size_t remaining_size{size};
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
@@ -281,6 +333,12 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t
     }
 }
 
+void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+    std::vector<u8> tmp_buffer(size);
+    ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
+    WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
+}
+
 void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
                              VAddr backing_addr) {
     LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 647cbf93a..e4f0c4bd6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -65,9 +65,32 @@ public:
     u8* GetPointer(GPUVAddr addr);
     const u8* GetPointer(GPUVAddr addr) const;
 
-    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
-    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
-    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+    // Returns true if the block is continous in host memory, false otherwise
+    bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
+
+    /**
+     * ReadBlock and WriteBlock are full read and write operations over virtual
+     * GPU Memory. It's important to use these when GPU memory may not be continous
+     * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
+     * Flushes and Invalidations, respectively to each operation.
+     */
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+
+    /**
+     * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
+     * WriteBlock respectively. In this versions, no flushing or invalidation is actually
+     * done and their performance is similar to a memcpy. This functions can be used
+     * on either of this 2 scenarios instead of their safe counterpart:
+     * - Memory which is sure to never be represented in the Host GPU.
+     * - Memory Managed by a Cache Manager. Example: Texture Flushing should use
+     * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
+     * being flushed.
+     */
+    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
+    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
+    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
 
 private:
     using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 99f67494c..43f2906a8 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -38,13 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
 }
 
 /// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(const u8* host_ptr) {
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr) {
     ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
     ASSERT_OR_EXECUTE(host_ptr != nullptr, {
         std::fill(program_code.begin(), program_code.end(), 0);
         return program_code;
     });
-    std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
     return program_code;
 }
 
@@ -497,11 +499,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     if (!shader) {
         // No shader found - create a new one
-        ProgramCode program_code{GetShaderCode(host_ptr)};
+        ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
         ProgramCode program_code_b;
         if (program == Maxwell::ShaderProgram::VertexA) {
-            program_code_b = GetShaderCode(
-                memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+            const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
+            program_code_b = GetShaderCode(memory_manager, program_addr_b,
+                                           memory_manager.GetPointer(program_addr_b));
         }
         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
         const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
author	bunnei <bunneidev@gmail.com>	2019-04-22 23:09:00 +0200
committer	GitHub <noreply@github.com>	2019-04-22 23:09:00 +0200
commit	68b707711a521f7987e1abcd1661b4575e3d270d (patch)
tree	af22e59d157b8b7a3e6260ca820aba3613197518
parent	Merge pull request #2400 from FernandoS27/corret-kepler-mem (diff)
parent	make ReadBlockunsafe and WriteBlockunsafe, ignore invalid pages. (diff)
download	yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.gz yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.bz2 yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.lz yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.xz yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.tar.zst yuzu-68b707711a521f7987e1abcd1661b4575e3d270d.zip