summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/fermi_2d.cpp61
-rw-r--r--src/video_core/engines/fermi_2d.h89
-rw-r--r--src/video_core/engines/maxwell_3d.cpp24
-rw-r--r--src/video_core/engines/maxwell_3d.h21
4 files changed, 178 insertions, 17 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 7aab163dc..9019f2504 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,71 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/textures/decoders.h"
namespace Tegra {
namespace Engines {
-void Fermi2D::WriteReg(u32 method, u32 value) {}
+Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+
+void Fermi2D::WriteReg(u32 method, u32 value) {
+ ASSERT_MSG(method < Regs::NUM_REGS,
+ "Invalid Fermi2D register, increase the size of the Regs structure");
+
+ regs.reg_array[method] = value;
+
+ switch (method) {
+ case FERMI2D_REG_INDEX(trigger): {
+ HandleSurfaceCopy();
+ break;
+ }
+ }
+}
+
+void Fermi2D::HandleSurfaceCopy() {
+ NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
+ static_cast<u32>(regs.operation));
+
+ const GPUVAddr source = regs.src.Address();
+ const GPUVAddr dest = regs.dst.Address();
+
+ // TODO(Subv): Only same-format and same-size copies are allowed for now.
+ ASSERT(regs.src.format == regs.dst.format);
+ ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
+
+ // TODO(Subv): Only raw copies are implemented.
+ ASSERT(regs.operation == Regs::Operation::SrcCopy);
+
+ const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
+ const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+
+ u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
+ u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
+
+ if (regs.src.linear == regs.dst.linear) {
+ // If the input layout and the output layout are the same, just perform a raw copy.
+ Memory::CopyBlock(dest_cpu, source_cpu,
+ src_bytes_per_pixel * regs.dst.width * regs.dst.height);
+ return;
+ }
+
+ u8* src_buffer = Memory::GetPointer(source_cpu);
+ u8* dst_buffer = Memory::GetPointer(dest_cpu);
+
+ if (!regs.src.linear && regs.dst.linear) {
+ // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel,
+ dst_bytes_per_pixel, src_buffer, dst_buffer, true,
+ regs.src.block_height);
+ } else {
+ // If the input is linear and the output is tiled, swizzle the input and copy it over.
+ Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel,
+ dst_bytes_per_pixel, dst_buffer, src_buffer, false,
+ regs.dst.block_height);
+ }
+}
} // namespace Engines
} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8967ddede..0c5b413cc 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -4,19 +4,106 @@
#pragma once
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
namespace Tegra {
namespace Engines {
+#define FERMI2D_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
+
class Fermi2D final {
public:
- Fermi2D() = default;
+ explicit Fermi2D(MemoryManager& memory_manager);
~Fermi2D() = default;
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value);
+
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0x258;
+
+ struct Surface {
+ RenderTargetFormat format;
+ BitField<0, 1, u32> linear;
+ union {
+ BitField<0, 4, u32> block_depth;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_width;
+ };
+ u32 depth;
+ u32 layer;
+ u32 pitch;
+ u32 width;
+ u32 height;
+ u32 address_high;
+ u32 address_low;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ };
+ static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
+
+ enum class Operation : u32 {
+ SrcCopyAnd = 0,
+ ROPAnd = 1,
+ Blend = 2,
+ SrcCopy = 3,
+ ROP = 4,
+ SrcCopyPremult = 5,
+ BlendPremult = 6,
+ };
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS(0x80);
+
+ Surface dst;
+
+ INSERT_PADDING_WORDS(2);
+
+ Surface src;
+
+ INSERT_PADDING_WORDS(0x15);
+
+ Operation operation;
+
+ INSERT_PADDING_WORDS(0x9);
+
+ // TODO(Subv): This is only a guess.
+ u32 trigger;
+
+ INSERT_PADDING_WORDS(0x1A3);
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ MemoryManager& memory_manager;
+
+private:
+ /// Performs the copy from the source surface to the destination surface as configured in the
+ /// registers.
+ void HandleSurfaceCopy();
};
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(dst, 0x80);
+ASSERT_REG_POSITION(src, 0x8C);
+ASSERT_REG_POSITION(operation, 0xAB);
+ASSERT_REG_POSITION(trigger, 0xB5);
+#undef ASSERT_REG_POSITION
+
} // namespace Engines
} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 4e9aed380..4306b894f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,10 +22,6 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(MemoryManager& memory_manager)
: memory_manager(memory_manager), macro_interpreter(*this) {}
-void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) {
- uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code);
-}
-
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
auto macro_code = uploaded_macros.find(method);
// The requested macro must have been uploaded already.
@@ -37,9 +33,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
}
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
- ASSERT_MSG(method < Regs::NUM_REGS,
- "Invalid Maxwell3D register, increase the size of the Regs structure");
-
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
// It is an error to write to a register other than the current macro's ARG register before it
@@ -68,6 +61,9 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
return;
}
+ ASSERT_MSG(method < Regs::NUM_REGS,
+ "Invalid Maxwell3D register, increase the size of the Regs structure");
+
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
@@ -75,6 +71,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
regs.reg_array[method] = value;
switch (method) {
+ case MAXWELL3D_REG_INDEX(macros.data): {
+ ProcessMacroUpload(value);
+ break;
+ }
case MAXWELL3D_REG_INDEX(code_address.code_address_high):
case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
// Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS
@@ -141,6 +141,12 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
}
}
+void Maxwell3D::ProcessMacroUpload(u32 data) {
+ // Store the uploaded macro code to interpret them when they're called.
+ auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
+ macro.push_back(data);
+}
+
void Maxwell3D::ProcessQueryGet() {
GPUVAddr sequence_address = regs.query.QueryAddress();
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
@@ -186,8 +192,8 @@ void Maxwell3D::ProcessQueryGet() {
}
void Maxwell3D::DrawArrays() {
- LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
- regs.vertex_buffer.count);
+ NGLOG_DEBUG(HW_GPU, "called, topology={}, count={}",
+ static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a022665eb..5cf62fb01 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -31,7 +31,7 @@ public:
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
- static constexpr size_t NUM_REGS = 0xE36;
+ static constexpr size_t NUM_REGS = 0xE00;
static constexpr size_t NumRenderTargets = 8;
static constexpr size_t NumViewports = 16;
@@ -322,7 +322,15 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x200);
+ INSERT_PADDING_WORDS(0x45);
+
+ struct {
+ INSERT_PADDING_WORDS(1);
+ u32 data;
+ u32 entry;
+ } macros;
+
+ INSERT_PADDING_WORDS(0x1B8);
struct {
u32 address_high;
@@ -605,7 +613,7 @@ public:
u32 size[MaxShaderStage];
} tex_info_buffers;
- INSERT_PADDING_WORDS(0x102);
+ INSERT_PADDING_WORDS(0xCC);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -637,9 +645,6 @@ public:
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value, u32 remaining_params);
- /// Uploads the code for a GPU macro program associated with the specified entry.
- void SubmitMacroCode(u32 entry, std::vector<u32> code);
-
/// Returns a list of enabled textures for the specified shader stage.
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
@@ -670,6 +675,9 @@ private:
*/
void CallMacroMethod(u32 method, std::vector<u32> parameters);
+ /// Handles writes to the macro uploading registers.
+ void ProcessMacroUpload(u32 data);
+
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
@@ -687,6 +695,7 @@ private:
static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
+ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform[0], 0x280);
ASSERT_REG_POSITION(viewport, 0x300);