From a91d3fc6397560fc6294a24faeed73d45abd1753 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 22 Apr 2019 18:50:56 -0400 Subject: Revamp Kepler Memory to use a subegine to manage uploads --- src/video_core/engines/engine_upload.cpp | 44 +++++++++++++++++++ src/video_core/engines/engine_upload.h | 74 ++++++++++++++++++++++++++++++++ src/video_core/engines/kepler_memory.cpp | 45 ++++--------------- src/video_core/engines/kepler_memory.h | 60 +++----------------------- 4 files changed, 131 insertions(+), 92 deletions(-) create mode 100644 src/video_core/engines/engine_upload.cpp create mode 100644 src/video_core/engines/engine_upload.h (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp new file mode 100644 index 000000000..201f8e273 --- /dev/null +++ b/src/video_core/engines/engine_upload.cpp @@ -0,0 +1,44 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/engines/engine_upload.h" +#include "video_core/memory_manager.h" +#include "video_core/textures/decoders.h" + +namespace Tegra::Engines::Upload { + +void State::ProcessExec(const bool is_linear) { + write_offset = 0; + copy_size = regs.line_length_in * regs.line_count; + inner_buffer.resize(copy_size); + linear = is_linear; +} + +void State::ProcessData(const u32 data, const bool is_last_call) { + const u32 sub_copy_size = std::min(4U, copy_size - write_offset); + std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); + write_offset += sub_copy_size; + if (is_last_call) { + const GPUVAddr address{regs.dest.Address()}; + if (linear) { + memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); + } else { + UNIMPLEMENTED_IF(regs.dest.z != 0); + UNIMPLEMENTED_IF(regs.dest.depth != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + const std::size_t dst_size = Tegra::Texture::CalculateSize( + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + std::vector tmp_buffer(dst_size); + memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); + Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, + regs.dest.y, regs.dest.BlockHeight(), copy_size, + inner_buffer.data(), tmp_buffer.data()); + memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + } + } +} + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h new file mode 100644 index 000000000..3a817140a --- /dev/null +++ b/src/video_core/engines/engine_upload.h @@ -0,0 +1,74 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines::Upload { + +struct Data { + u32 line_length_in; + u32 line_count; + + struct { + u32 address_high; + u32 address_low; + u32 pitch; + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 width; + u32 height; + u32 depth; + u32 z; + u32 x; + u32 y; + + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | address_low); + } + + u32 BlockWidth() const { + return 1U << block_width.Value(); + } + + u32 BlockHeight() const { + return 1U << block_height.Value(); + } + + u32 BlockDepth() const { + return 1U << block_depth.Value(); + } + } dest; +}; + +class State { +public: + State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {} + ~State() = default; + + void ProcessExec(const bool is_linear); + void ProcessData(const u32 data, const bool is_last_call); + +private: + u32 write_offset = 0; + u32 copy_size = 0; + std::vector inner_buffer; + bool linear; + Data& regs; + MemoryManager& memory_manager; +}; + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 7387886a3..71fa499d3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -14,9 +14,8 @@ namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} +KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) + : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; @@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { switch (method_call.method) { case KEPLERMEMORY_REG_INDEX(exec): { - ProcessExec(); + upload_state.ProcessExec(regs.exec.linear != 0); break; } case KEPLERMEMORY_REG_INDEX(data): { - ProcessData(method_call.argument, method_call.IsLastCall()); + bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } break; } } } -void KeplerMemory::ProcessExec() { - state.write_offset = 0; - state.copy_size = regs.line_length_in * regs.line_count; - state.inner_buffer.resize(state.copy_size); -} - -void KeplerMemory::ProcessData(u32 data, bool is_last_call) { - const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); - std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); - state.write_offset += sub_copy_size; - if (is_last_call) { - const GPUVAddr address{regs.dest.Address()}; - if (regs.exec.linear != 0) { - memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); - } else { - UNIMPLEMENTED_IF(regs.dest.z != 0); - UNIMPLEMENTED_IF(regs.dest.depth != 1); - UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); - UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); - const std::size_t dst_size = Tegra::Texture::CalculateSize( - true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); - std::vector tmp_buffer(dst_size); - memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); - Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, - regs.dest.y, regs.dest.BlockHeight(), state.copy_size, - state.inner_buffer.data(), tmp_buffer.data()); - memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); - } - system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); - } -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5f892ddad..c6b738eb9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -10,6 +10,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" namespace Core { @@ -20,10 +21,6 @@ namespace Tegra { class MemoryManager; } -namespace VideoCore { -class RasterizerInterface; -} - namespace Tegra::Engines { #define KEPLERMEMORY_REG_INDEX(field_name) \ @@ -31,8 +28,7 @@ namespace Tegra::Engines { class KeplerMemory final { public: - KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + KeplerMemory(Core::System& system, MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. @@ -45,42 +41,7 @@ public: struct { INSERT_PADDING_WORDS(0x60); - u32 line_length_in; - u32 line_count; - - struct { - u32 address_high; - u32 address_low; - u32 pitch; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - }; - u32 width; - u32 height; - u32 depth; - u32 z; - u32 x; - u32 y; - - GPUVAddr Address() const { - return static_cast((static_cast(address_high) << 32) | - address_low); - } - - u32 BlockWidth() const { - return 1U << block_width.Value(); - } - - u32 BlockHeight() const { - return 1U << block_height.Value(); - } - - u32 BlockDepth() const { - return 1U << block_depth.Value(); - } - } dest; + Upload::Data upload; struct { union { @@ -96,28 +57,17 @@ public: }; } regs{}; - struct { - u32 write_offset = 0; - u32 copy_size = 0; - std::vector inner_buffer; - } state{}; - private: Core::System& system; - VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; - - void ProcessExec(); - void ProcessData(u32 data, bool is_last_call); + Upload::State upload_state; }; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(line_length_in, 0x60); -ASSERT_REG_POSITION(line_count, 0x61); -ASSERT_REG_POSITION(dest, 0x62); +ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec, 0x6C); ASSERT_REG_POSITION(data, 0x6D); #undef ASSERT_REG_POSITION -- cgit v1.2.3 From e4ff140b99339589d87836f865fc437719adbbe9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 22 Apr 2019 19:05:43 -0400 Subject: Introduce skeleton of the GPU Compute Engine. --- src/video_core/engines/kepler_compute.cpp | 37 ++++++- src/video_core/engines/kepler_compute.h | 171 +++++++++++++++++++++++++++++- 2 files changed, 201 insertions(+), 7 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index b1d950460..28f1f6a7d 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -4,12 +4,21 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h" namespace Tegra::Engines { -KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} +KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager) + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ + memory_manager, + regs.upload} {} KeplerCompute::~KeplerCompute() = default; @@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; switch (method_call.method) { + case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + upload_state.ProcessExec(regs.exec_upload.linear != 0); + break; + } + case KEPLER_COMPUTE_REG_INDEX(data_upload): { + bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } + break; + } case KEPLER_COMPUTE_REG_INDEX(launch): - // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA - // kernels) - UNREACHABLE_MSG("Compute shaders are not implemented"); + ProcessLaunch(); break; default: break; } } +void KeplerCompute::ProcessLaunch() { + + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, + LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); + + const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; + LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fb6cdf432..ab2781b4b 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -6,14 +6,25 @@ #include #include +#include +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" +namespace Core { +class System; +} + namespace Tegra { class MemoryManager; } +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ @@ -21,7 +32,8 @@ namespace Tegra::Engines { class KeplerCompute final { public: - explicit KeplerCompute(MemoryManager& memory_manager); + explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager); ~KeplerCompute(); static constexpr std::size_t NumConstBuffers = 8; @@ -31,30 +43,183 @@ public: union { struct { - INSERT_PADDING_WORDS(0xAF); + INSERT_PADDING_WORDS(0x60); + + Upload::Data upload; + + struct { + union { + BitField<0, 1, u32> linear; + }; + } exec_upload; + + u32 data_upload; + + INSERT_PADDING_WORDS(0x3F); + + struct { + u32 address; + GPUVAddr Address() const { + return static_cast((static_cast(address) << 8)); + } + } launch_desc_loc; + + INSERT_PADDING_WORDS(0x1); u32 launch; - INSERT_PADDING_WORDS(0xC48); + INSERT_PADDING_WORDS(0x4A7); + + struct { + u32 address_high; + u32 address_low; + u32 limit; + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + } tsc; + + INSERT_PADDING_WORDS(0x3); + + struct { + u32 address_high; + u32 address_low; + u32 limit; + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + } tic; + + INSERT_PADDING_WORDS(0x22); + + struct { + u32 address_high; + u32 address_low; + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + } code_loc; + + INSERT_PADDING_WORDS(0x3FE); + + u32 texture_const_buffer_index; + + INSERT_PADDING_WORDS(0x374); }; std::array reg_array; }; } regs{}; + + struct LaunchParams { + static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; + + INSERT_PADDING_WORDS(0x8); + + u32 program_start; + + INSERT_PADDING_WORDS(0x2); + + BitField<30, 1, u32> linked_tsc; + + BitField<0, 31, u32> grid_dim_x; + + union { + BitField<0, 16, u32> grid_dim_y; + BitField<16, 16, u32> grid_dim_z; + }; + + INSERT_PADDING_WORDS(0x3); + + BitField<0, 16, u32> shared_alloc; + + BitField<0, 31, u32> block_dim_x; + + union { + BitField<0, 16, u32> block_dim_y; + BitField<16, 16, u32> block_dim_z; + }; + + union { + BitField<0, 8, u32> const_buffer_enable_mask; + BitField<29, 2, u32> cache_layout; + } memory_config; + + INSERT_PADDING_WORDS(0x8); + + struct { + u32 address_low; + union { + BitField<0, 8, u32> address_high; + BitField<15, 17, u32> size; + }; + GPUVAddr Address() const { + return static_cast((static_cast(address_high.Value()) << 32) | + address_low); + } + } const_buffer_config[8]; + + union { + BitField<0, 20, u32> local_pos_alloc; + BitField<27, 5, u32> barrier_alloc; + }; + + union { + BitField<0, 20, u32> local_neg_alloc; + BitField<24, 5, u32> gpr_alloc; + }; + + INSERT_PADDING_WORDS(0x11); + } launch_description; + + struct { + u32 write_offset = 0; + u32 copy_size = 0; + std::vector inner_buffer; + } state{}; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "KeplerCompute Regs has wrong size"); + static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), + "KeplerCompute LaunchParams has wrong size"); + /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); private: + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; + Upload::State upload_state; + + void ProcessLaunch(); }; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \ + static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D); ASSERT_REG_POSITION(launch, 0xAF); +ASSERT_REG_POSITION(tsc, 0x557); +ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(code_loc, 0x582); +ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); +ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); +ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); +ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); +ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); +ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); +ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); #undef ASSERT_REG_POSITION -- cgit v1.2.3 From 701ce1c9d03c6c6a70965cbba3e961ab06abf49c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 22 Apr 2019 19:27:36 -0400 Subject: Implement Maxwell3D Data Upload --- src/video_core/engines/maxwell_3d.cpp | 16 ++++++++++++++-- src/video_core/engines/maxwell_3d.h | 19 ++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9780417f2..78810dbbb 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ - *this} { + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, + macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { InitializeRegisterDefaults(); } @@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessSyncPoint(); break; } + case MAXWELL3D_REG_INDEX(exec_upload): { + upload_state.ProcessExec(regs.exec_upload.linear != 0); + break; + } + case MAXWELL3D_REG_INDEX(data_upload): { + bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + dirty_flags.OnMemoryWrite(); + } + break; + } default: break; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cc2424d38..47fe1f137 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -14,6 +14,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" #include "video_core/textures/texture.h" @@ -579,7 +580,18 @@ public: u32 bind; } macros; - INSERT_PADDING_WORDS(0x69); + INSERT_PADDING_WORDS(0x17); + + Upload::Data upload; + struct { + union { + BitField<0, 1, u32> linear; + }; + } exec_upload; + + u32 data_upload; + + INSERT_PADDING_WORDS(0x44); struct { union { @@ -1175,6 +1187,8 @@ private: /// Interpreter for the macro codes uploaded to the GPU. MacroInterpreter macro_interpreter; + Upload::State upload_state; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1218,6 +1232,9 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(macros, 0x45); +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D); ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tfb_enabled, 0x1D1); ASSERT_REG_POSITION(rt, 0x200); -- cgit v1.2.3 From 021d28c9b81d86d52b989b4cc6828f33c6a63c9b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 23 Apr 2019 08:02:24 -0400 Subject: Corrections and styling --- src/video_core/engines/engine_upload.cpp | 5 ++++- src/video_core/engines/engine_upload.h | 4 ++-- src/video_core/engines/kepler_compute.cpp | 2 +- src/video_core/engines/kepler_memory.cpp | 2 +- src/video_core/engines/maxwell_3d.cpp | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 201f8e273..663d5517a 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -9,11 +9,14 @@ namespace Tegra::Engines::Upload { +State::State(MemoryManager& memory_manager, Data& regs) + : memory_manager(memory_manager), regs(regs) {} + void State::ProcessExec(const bool is_linear) { write_offset = 0; copy_size = regs.line_length_in * regs.line_count; inner_buffer.resize(copy_size); - linear = is_linear; + this->is_linear = is_linear; } void State::ProcessData(const u32 data, const bool is_last_call) { diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 3a817140a..431f56030 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -56,7 +56,7 @@ struct Data { class State { public: - State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {} + State(MemoryManager& memory_manager, Data& regs); ~State() = default; void ProcessExec(const bool is_linear); @@ -66,7 +66,7 @@ private: u32 write_offset = 0; u32 copy_size = 0; std::vector inner_buffer; - bool linear; + bool is_linear; Data& regs; MemoryManager& memory_manager; }; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 28f1f6a7d..7404a8163 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -34,7 +34,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { break; } case KEPLER_COMPUTE_REG_INDEX(data_upload): { - bool is_last_call = method_call.IsLastCall(); + const bool is_last_call = method_call.IsLastCall(); upload_state.ProcessData(method_call.argument, is_last_call); if (is_last_call) { system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 71fa499d3..0561f676c 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -31,7 +31,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { break; } case KEPLERMEMORY_REG_INDEX(data): { - bool is_last_call = method_call.IsLastCall(); + const bool is_last_call = method_call.IsLastCall(); upload_state.ProcessData(method_call.argument, is_last_call); if (is_last_call) { system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 78810dbbb..d7b586db9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -258,7 +258,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { break; } case MAXWELL3D_REG_INDEX(data_upload): { - bool is_last_call = method_call.IsLastCall(); + const bool is_last_call = method_call.IsLastCall(); upload_state.ProcessData(method_call.argument, is_last_call); if (is_last_call) { dirty_flags.OnMemoryWrite(); -- cgit v1.2.3 From e140e2ebc65a7ad0a25ba979141e80e9376f5eff Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 23 Apr 2019 08:44:52 -0400 Subject: Add Documentation Headers to all the GPU Engines --- src/video_core/engines/fermi_2d.h | 6 ++++++ src/video_core/engines/kepler_compute.h | 6 ++++++ src/video_core/engines/kepler_memory.h | 6 ++++++ src/video_core/engines/maxwell_3d.h | 6 ++++++ src/video_core/engines/maxwell_dma.h | 5 +++++ 5 files changed, 29 insertions(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2e51b7f13..45f59a4d9 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -21,6 +21,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as G80_2D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h + */ + #define FERMI2D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index ab2781b4b..50f318848 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -27,6 +27,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GK104_Compute. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h + */ + #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index c6b738eb9..473bff20a 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -23,6 +23,12 @@ class MemoryManager; namespace Tegra::Engines { +/** + * This Engine is known as P2MF. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h + */ + #define KEPLERMEMORY_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 47fe1f137..eae427412 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -33,6 +33,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GF100_3D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h + */ + #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c6b649842..35b25e6b9 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -25,6 +25,11 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GK104_Copy. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml + */ + class MaxwellDMA final { public: explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, -- cgit v1.2.3 From f1e5314f1a57b09bd45e0db3ca91449e761c431a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 23 Apr 2019 11:21:00 -0400 Subject: Add Swizzle Parameters to the DMA engine --- src/video_core/engines/engine_upload.cpp | 2 +- src/video_core/engines/maxwell_dma.h | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 663d5517a..867457f25 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -25,7 +25,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) { write_offset += sub_copy_size; if (is_last_call) { const GPUVAddr address{regs.dest.Address()}; - if (linear) { + if (is_linear) { memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); } else { UNIMPLEMENTED_IF(regs.dest.z != 0); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 35b25e6b9..0b2e26199 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -68,6 +68,16 @@ public: static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); + enum class ComponentMode : u32 { + SRC0 = 0, + SRC1 = 1, + SRC2 = 2, + SRC3 = 3, + CONST0 = 4, + CONST1 = 5, + ZERO = 6, + }; + enum class CopyMode : u32 { None = 0, Unk1 = 1, @@ -133,7 +143,19 @@ public: u32 x_count; u32 y_count; - INSERT_PADDING_WORDS(0xBB); + INSERT_PADDING_WORDS(0xB8); + + u32 const0; + u32 const1; + union { + BitField<0, 4, ComponentMode> component0; + BitField<4, 4, ComponentMode> component1; + BitField<8, 4, ComponentMode> component2; + BitField<12, 4, ComponentMode> component3; + BitField<16, 2, u32> component_size; + BitField<20, 3, u32> src_num_components; + BitField<24, 3, u32> dst_num_components; + } swizzle_config; Parameters dst_params; @@ -170,6 +192,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104); ASSERT_REG_POSITION(dst_pitch, 0x105); ASSERT_REG_POSITION(x_count, 0x106); ASSERT_REG_POSITION(y_count, 0x107); +ASSERT_REG_POSITION(const0, 0x1C0); +ASSERT_REG_POSITION(const1, 0x1C1); +ASSERT_REG_POSITION(swizzle_config, 0x1C2); ASSERT_REG_POSITION(dst_params, 0x1C3); ASSERT_REG_POSITION(src_params, 0x1CA); -- cgit v1.2.3 From b3118ee316863e1f4a35548f69bc1194bb740627 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 23 Apr 2019 12:41:55 -0400 Subject: Fixes and Corrections to DMA Engine --- src/video_core/engines/maxwell_dma.cpp | 83 +++++++++++++++++++--------------- src/video_core/engines/maxwell_dma.h | 11 +++++ 2 files changed, 57 insertions(+), 37 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2426d0067..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() { ASSERT(regs.exec.enable_2d == 1); - const std::size_t copy_size = regs.x_count * regs.y_count; + if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { + ASSERT(regs.src_params.size_z == 1); + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const std::size_t src_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, + regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); - auto source_ptr{memory_manager.GetPointer(source)}; - auto dst_ptr{memory_manager.GetPointer(dest)}; + const std::size_t dst_size = regs.dst_pitch * regs.y_count; - if (!source_ptr) { - LOG_ERROR(HW_GPU, "source_ptr is invalid"); - return; - } + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } - if (!dst_ptr) { - LOG_ERROR(HW_GPU, "dst_ptr is invalid"); - return; - } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { - // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated - // copying. - rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - // We have to invalidate the destination region to evict any outdated surfaces from the - // cache. We do this before actually writing the new data because the destination address - // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); - }; + Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, + regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), + write_buffer.data(), regs.src_params.BlockHeight(), + regs.src_params.pos_x, regs.src_params.pos_y); - if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { - ASSERT(regs.src_params.size_z == 1); - // If the input is tiled and the output is linear, deswizzle the input and copy it over. + memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); + } else { + ASSERT(regs.dst_params.BlockDepth() == 1); - const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; - FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, - copy_size * src_bytes_per_pixel); + const std::size_t dst_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, + regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); - Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, - regs.src_params.BlockHeight(), regs.src_params.pos_x, - regs.src_params.pos_y); - } else { - ASSERT(regs.dst_params.size_z == 1); - ASSERT(regs.src_pitch == regs.x_count); + const std::size_t dst_layer_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, + regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); - const u32 src_bpp = regs.src_pitch / regs.x_count; + const std::size_t src_size = regs.src_pitch * regs.y_count; - FlushAndInvalidate(regs.src_pitch * regs.y_count, - regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } + + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); // If the input is linear and the output is tiled, swizzle the input and copy it over. Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, - src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); + src_bytes_per_pixel, + write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, + read_buffer.data(), regs.dst_params.BlockHeight()); + + memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 0b2e26199..8eab1332e 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -155,6 +156,13 @@ public: BitField<16, 2, u32> component_size; BitField<20, 3, u32> src_num_components; BitField<24, 3, u32> dst_num_components; + + u32 SrcBytePerPixel() const { + return src_num_components.Value() * component_size.Value(); + } + u32 DstBytePerPixel() const { + return dst_num_components.Value() * component_size.Value(); + } } swizzle_config; Parameters dst_params; @@ -176,6 +184,9 @@ private: MemoryManager& memory_manager; + std::vector read_buffer; + std::vector write_buffer; + /// Performs the copy from the source buffer to the destination buffer as configured in the /// registers. void HandleCopy(); -- cgit v1.2.3 From e64c41efe88e8f88014fef912b06b71b3df17e85 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 25 Apr 2019 12:57:10 -0400 Subject: Refactors and name corrections. --- src/video_core/engines/engine_upload.cpp | 39 ++++++++++++++++---------------- src/video_core/engines/engine_upload.h | 9 ++++---- src/video_core/engines/kepler_compute.h | 4 +--- src/video_core/engines/kepler_memory.h | 2 +- src/video_core/engines/maxwell_3d.h | 2 +- src/video_core/engines/maxwell_dma.h | 14 ++++++------ 6 files changed, 35 insertions(+), 35 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 867457f25..f8aa4ff55 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -9,7 +9,7 @@ namespace Tegra::Engines::Upload { -State::State(MemoryManager& memory_manager, Data& regs) +State::State(MemoryManager& memory_manager, Registers& regs) : memory_manager(memory_manager), regs(regs) {} void State::ProcessExec(const bool is_linear) { @@ -23,24 +23,25 @@ void State::ProcessData(const u32 data, const bool is_last_call) { const u32 sub_copy_size = std::min(4U, copy_size - write_offset); std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); write_offset += sub_copy_size; - if (is_last_call) { - const GPUVAddr address{regs.dest.Address()}; - if (is_linear) { - memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); - } else { - UNIMPLEMENTED_IF(regs.dest.z != 0); - UNIMPLEMENTED_IF(regs.dest.depth != 1); - UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); - UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); - const std::size_t dst_size = Tegra::Texture::CalculateSize( - true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); - std::vector tmp_buffer(dst_size); - memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); - Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, - regs.dest.y, regs.dest.BlockHeight(), copy_size, - inner_buffer.data(), tmp_buffer.data()); - memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); - } + if (!is_last_call) { + return; + } + const GPUVAddr address{regs.dest.Address()}; + if (is_linear) { + memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); + } else { + UNIMPLEMENTED_IF(regs.dest.z != 0); + UNIMPLEMENTED_IF(regs.dest.depth != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + const std::size_t dst_size = Tegra::Texture::CalculateSize( + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + tmp_buffer.resize(dst_size); + memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); + Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, + regs.dest.BlockHeight(), copy_size, inner_buffer.data(), + tmp_buffer.data()); + memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 431f56030..9c6e0d21c 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -16,7 +16,7 @@ class MemoryManager; namespace Tegra::Engines::Upload { -struct Data { +struct Registers { u32 line_length_in; u32 line_count; @@ -56,7 +56,7 @@ struct Data { class State { public: - State(MemoryManager& memory_manager, Data& regs); + State(MemoryManager& memory_manager, Registers& regs); ~State() = default; void ProcessExec(const bool is_linear); @@ -66,8 +66,9 @@ private: u32 write_offset = 0; u32 copy_size = 0; std::vector inner_buffer; - bool is_linear; - Data& regs; + std::vector tmp_buffer; + bool is_linear = false; + Registers& regs; MemoryManager& memory_manager; }; diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 50f318848..5250b8d9b 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -51,7 +51,7 @@ public: struct { INSERT_PADDING_WORDS(0x60); - Upload::Data upload; + Upload::Registers upload; struct { union { @@ -131,7 +131,6 @@ public: BitField<30, 1, u32> linked_tsc; BitField<0, 31, u32> grid_dim_x; - union { BitField<0, 16, u32> grid_dim_y; BitField<16, 16, u32> grid_dim_z; @@ -142,7 +141,6 @@ public: BitField<0, 16, u32> shared_alloc; BitField<0, 31, u32> block_dim_x; - union { BitField<0, 16, u32> block_dim_y; BitField<16, 16, u32> block_dim_z; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 473bff20a..f3bc675a9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -47,7 +47,7 @@ public: struct { INSERT_PADDING_WORDS(0x60); - Upload::Data upload; + Upload::Registers upload; struct { union { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index eae427412..889723535 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -588,7 +588,7 @@ public: INSERT_PADDING_WORDS(0x17); - Upload::Data upload; + Upload::Registers upload; struct { union { BitField<0, 1, u32> linear; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 8eab1332e..e5942f671 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -70,13 +70,13 @@ public: static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); enum class ComponentMode : u32 { - SRC0 = 0, - SRC1 = 1, - SRC2 = 2, - SRC3 = 3, - CONST0 = 4, - CONST1 = 5, - ZERO = 6, + Src0 = 0, + Src1 = 1, + Src2 = 2, + Src3 = 3, + Const0 = 4, + Const1 = 5, + Zero = 6, }; enum class CopyMode : u32 { -- cgit v1.2.3