summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/fermi_2d.cpp89
-rw-r--r--src/video_core/engines/fermi_2d.h331
-rw-r--r--src/video_core/engines/kepler_compute.cpp26
-rw-r--r--src/video_core/engines/kepler_compute.h5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp45
-rw-r--r--src/video_core/engines/maxwell_3d.h127
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
7 files changed, 377 insertions, 249 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 4293d676c..a01d334ad 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,11 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D() = default;
+Fermi2D::Fermi2D() {
+ // Nvidia's OpenGL driver seems to assume these values
+ regs.src.depth = 1;
+ regs.dst.depth = 1;
+}
Fermi2D::~Fermi2D() = default;
@@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Fermi2D register, increase the size of the Regs structure");
-
regs.reg_array[method] = method_argument;
- switch (method) {
- // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
- // so trigger on the second 32-bit write.
- case FERMI2D_REG_INDEX(blit_src_y) + 1: {
- HandleSurfaceCopy();
- break;
- }
+ if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
+ Blit();
}
}
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; ++i) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
}
-static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
- const u32 line_a = src_2 - src_1;
- const u32 line_b = dst_2 - dst_1;
- const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
- return {line_b - (excess * line_b) / line_a, excess};
-}
-
-void Fermi2D::HandleSurfaceCopy() {
- LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
+void Fermi2D::Blit() {
+ LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
+ regs.src.Address(), regs.dst.Address());
- // TODO(Subv): Only raw copies are implemented.
- ASSERT(regs.operation == Operation::SrcCopy);
+ UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
+ UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
+ UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
+ UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
+ UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
- const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
- const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
- u32 src_blit_x2, src_blit_y2;
- if (regs.blit_control.origin == Origin::Corner) {
- src_blit_x2 =
- static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
- src_blit_y2 =
- static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
- } else {
- src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
- src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
- }
- u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
- u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
- const auto [new_dst_w, src_excess_x] =
- DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
- const auto [new_dst_h, src_excess_y] =
- DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
- dst_blit_x2 = new_dst_w + regs.blit_dst_x;
- src_blit_x2 = src_blit_x2 - src_excess_x;
- dst_blit_y2 = new_dst_h + regs.blit_dst_y;
- src_blit_y2 = src_blit_y2 - src_excess_y;
- const auto [new_src_w, dst_excess_x] =
- DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
- const auto [new_src_h, dst_excess_y] =
- DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
- src_blit_x2 = new_src_w + src_blit_x1;
- dst_blit_x2 = dst_blit_x2 - dst_excess_x;
- src_blit_y2 = new_src_h + src_blit_y1;
- dst_blit_y2 = dst_blit_y2 - dst_excess_y;
- const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
- const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
- dst_blit_y2};
- const Config copy_config{
+ const auto& args = regs.pixels_from_memory;
+ const Config config{
.operation = regs.operation,
- .filter = regs.blit_control.filter,
- .src_rect = src_rect,
- .dst_rect = dst_rect,
+ .filter = args.sample_mode.filter,
+ .dst_x0 = args.dst_x0,
+ .dst_y0 = args.dst_y0,
+ .dst_x1 = args.dst_x0 + args.dst_width,
+ .dst_y1 = args.dst_y0 + args.dst_height,
+ .src_x0 = static_cast<s32>(args.src_x0 >> 32),
+ .src_y0 = static_cast<s32>(args.src_y0 >> 32),
+ .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
+ .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
- if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+ if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0909709ec..81522988e 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -53,8 +53,8 @@ public:
};
enum class Filter : u32 {
- PointSample = 0, // Nearest
- Linear = 1,
+ Point = 0,
+ Bilinear = 1,
};
enum class Operation : u32 {
@@ -67,88 +67,235 @@ public:
BlendPremult = 6,
};
- struct Regs {
- static constexpr std::size_t NUM_REGS = 0x258;
+ enum class MemoryLayout : u32 {
+ BlockLinear = 0,
+ Pitch = 1,
+ };
- struct Surface {
- RenderTargetFormat format;
- BitField<0, 1, u32> linear;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- };
- u32 depth;
- u32 layer;
- u32 pitch;
- u32 width;
- u32 height;
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
-
- u32 BlockWidth() const {
- return block_width.Value();
- }
-
- u32 BlockHeight() const {
- return block_height.Value();
- }
-
- u32 BlockDepth() const {
- return block_depth.Value();
- }
- };
- static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
+ enum class CpuIndexWrap : u32 {
+ Wrap = 0,
+ NoWrap = 1,
+ };
+ struct Surface {
+ RenderTargetFormat format;
+ MemoryLayout linear;
union {
- struct {
- INSERT_UNION_PADDING_WORDS(0x80);
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ };
+ u32 depth;
+ u32 layer;
+ u32 pitch;
+ u32 width;
+ u32 height;
+ u32 addr_upper;
+ u32 addr_lower;
+
+ [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
+ return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
+ }
+ };
+ static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
- Surface dst;
+ enum class SectorPromotion : u32 {
+ NoPromotion = 0,
+ PromoteTo2V = 1,
+ PromoteTo2H = 2,
+ PromoteTo4 = 3,
+ };
+
+ enum class NumTpcs : u32 {
+ All = 0,
+ One = 1,
+ };
- INSERT_UNION_PADDING_WORDS(2);
+ enum class RenderEnableMode : u32 {
+ False = 0,
+ True = 1,
+ Conditional = 2,
+ RenderIfEqual = 3,
+ RenderIfNotEqual = 4,
+ };
- Surface src;
+ enum class ColorKeyFormat : u32 {
+ A16R56G6B5 = 0,
+ A1R5G55B5 = 1,
+ A8R8G8B8 = 2,
+ A2R10G10B10 = 3,
+ Y8 = 4,
+ Y16 = 5,
+ Y32 = 6,
+ };
- INSERT_UNION_PADDING_WORDS(0x15);
+ union Beta4 {
+ BitField<0, 8, u32> b;
+ BitField<8, 8, u32> g;
+ BitField<16, 8, u32> r;
+ BitField<24, 8, u32> a;
+ };
- Operation operation;
+ struct Point {
+ u32 x;
+ u32 y;
+ };
- INSERT_UNION_PADDING_WORDS(0x177);
+ enum class PatternSelect : u32 {
+ MonoChrome8x8 = 0,
+ MonoChrome64x1 = 1,
+ MonoChrome1x64 = 2,
+ Color = 3,
+ };
+ enum class NotifyType : u32 {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ };
+
+ enum class MonochromePatternColorFormat : u32 {
+ A8X8R8G6B5 = 0,
+ A1R5G5B5 = 1,
+ A8R8G8B8 = 2,
+ A8Y8 = 3,
+ A8X8Y16 = 4,
+ Y32 = 5,
+ };
+
+ enum class MonochromePatternFormat : u32 {
+ CGA6_M1 = 0,
+ LE_M1 = 1,
+ };
+
+ union Regs {
+ static constexpr std::size_t NUM_REGS = 0x258;
+ struct {
+ u32 object;
+ INSERT_UNION_PADDING_WORDS(0x3F);
+ u32 no_operation;
+ NotifyType notify;
+ INSERT_UNION_PADDING_WORDS(0x2);
+ u32 wait_for_idle;
+ INSERT_UNION_PADDING_WORDS(0xB);
+ u32 pm_trigger;
+ INSERT_UNION_PADDING_WORDS(0xF);
+ u32 context_dma_notify;
+ u32 dst_context_dma;
+ u32 src_context_dma;
+ u32 semaphore_context_dma;
+ INSERT_UNION_PADDING_WORDS(0x1C);
+ Surface dst;
+ CpuIndexWrap pixels_from_cpu_index_wrap;
+ u32 kind2d_check_enable;
+ Surface src;
+ SectorPromotion pixels_from_memory_sector_promotion;
+ INSERT_UNION_PADDING_WORDS(0x1);
+ NumTpcs num_tpcs;
+ u32 render_enable_addr_upper;
+ u32 render_enable_addr_lower;
+ RenderEnableMode render_enable_mode;
+ INSERT_UNION_PADDING_WORDS(0x4);
+ u32 clip_x0;
+ u32 clip_y0;
+ u32 clip_width;
+ u32 clip_height;
+ BitField<0, 1, u32> clip_enable;
+ BitField<0, 3, ColorKeyFormat> color_key_format;
+ u32 color_key;
+ BitField<0, 1, u32> color_key_enable;
+ BitField<0, 8, u32> rop;
+ u32 beta1;
+ Beta4 beta4;
+ Operation operation;
+ union {
+ BitField<0, 6, u32> x;
+ BitField<8, 6, u32> y;
+ } pattern_offset;
+ BitField<0, 2, PatternSelect> pattern_select;
+ INSERT_UNION_PADDING_WORDS(0xC);
+ struct {
+ BitField<0, 3, MonochromePatternColorFormat> color_format;
+ BitField<0, 1, MonochromePatternFormat> format;
+ u32 color0;
+ u32 color1;
+ u32 pattern0;
+ u32 pattern1;
+ } monochrome_pattern;
+ struct {
+ std::array<u32, 0x40> X8R8G8B8;
+ std::array<u32, 0x20> R5G6B5;
+ std::array<u32, 0x20> X1R5G5B5;
+ std::array<u32, 0x10> Y8;
+ } color_pattern;
+ INSERT_UNION_PADDING_WORDS(0x10);
+ struct {
+ u32 prim_mode;
+ u32 prim_color_format;
+ u32 prim_color;
+ u32 line_tie_break_bits;
+ INSERT_UNION_PADDING_WORDS(0x14);
+ u32 prim_point_xy;
+ INSERT_UNION_PADDING_WORDS(0x7);
+ std::array<Point, 0x40> prim_point;
+ } render_solid;
+ struct {
+ u32 data_type;
+ u32 color_format;
+ u32 index_format;
+ u32 mono_format;
+ u32 wrap;
+ u32 color0;
+ u32 color1;
+ u32 mono_opacity;
+ INSERT_UNION_PADDING_WORDS(0x6);
+ u32 src_width;
+ u32 src_height;
+ u32 dx_du_frac;
+ u32 dx_du_int;
+ u32 dx_dv_frac;
+ u32 dy_dv_int;
+ u32 dst_x0_frac;
+ u32 dst_x0_int;
+ u32 dst_y0_frac;
+ u32 dst_y0_int;
+ u32 data;
+ } pixels_from_cpu;
+ INSERT_UNION_PADDING_WORDS(0x3);
+ u32 big_endian_control;
+ INSERT_UNION_PADDING_WORDS(0x3);
+ struct {
+ BitField<0, 3, u32> block_shape;
+ BitField<0, 5, u32> corral_size;
+ BitField<0, 1, u32> safe_overlap;
union {
- u32 raw;
BitField<0, 1, Origin> origin;
BitField<4, 1, Filter> filter;
- } blit_control;
-
+ } sample_mode;
INSERT_UNION_PADDING_WORDS(0x8);
-
- u32 blit_dst_x;
- u32 blit_dst_y;
- u32 blit_dst_width;
- u32 blit_dst_height;
- u64 blit_du_dx;
- u64 blit_dv_dy;
- u64 blit_src_x;
- u64 blit_src_y;
-
- INSERT_UNION_PADDING_WORDS(0x21);
- };
- std::array<u32, NUM_REGS> reg_array;
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_width;
+ s32 dst_height;
+ s64 du_dx;
+ s64 dv_dy;
+ s64 src_x0;
+ s64 src_y0;
+ } pixels_from_memory;
};
+ std::array<u32, NUM_REGS> reg_array;
} regs{};
struct Config {
- Operation operation{};
- Filter filter{};
- Common::Rectangle<u32> src_rect;
- Common::Rectangle<u32> dst_rect;
+ Operation operation;
+ Filter filter;
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_x1;
+ s32 dst_y1;
+ s32 src_x0;
+ s32 src_y0;
+ s32 src_x1;
+ s32 src_y1;
};
private:
@@ -156,25 +303,49 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
- void HandleSurfaceCopy();
+ void Blit();
};
#define ASSERT_REG_POSITION(field_name, position) \
- static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
+ static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(dst, 0x80);
-ASSERT_REG_POSITION(src, 0x8C);
-ASSERT_REG_POSITION(operation, 0xAB);
-ASSERT_REG_POSITION(blit_control, 0x223);
-ASSERT_REG_POSITION(blit_dst_x, 0x22c);
-ASSERT_REG_POSITION(blit_dst_y, 0x22d);
-ASSERT_REG_POSITION(blit_dst_width, 0x22e);
-ASSERT_REG_POSITION(blit_dst_height, 0x22f);
-ASSERT_REG_POSITION(blit_du_dx, 0x230);
-ASSERT_REG_POSITION(blit_dv_dy, 0x232);
-ASSERT_REG_POSITION(blit_src_x, 0x234);
-ASSERT_REG_POSITION(blit_src_y, 0x236);
+ASSERT_REG_POSITION(object, 0x0);
+ASSERT_REG_POSITION(no_operation, 0x100);
+ASSERT_REG_POSITION(notify, 0x104);
+ASSERT_REG_POSITION(wait_for_idle, 0x110);
+ASSERT_REG_POSITION(pm_trigger, 0x140);
+ASSERT_REG_POSITION(context_dma_notify, 0x180);
+ASSERT_REG_POSITION(dst_context_dma, 0x184);
+ASSERT_REG_POSITION(src_context_dma, 0x188);
+ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
+ASSERT_REG_POSITION(dst, 0x200);
+ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
+ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
+ASSERT_REG_POSITION(src, 0x230);
+ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
+ASSERT_REG_POSITION(num_tpcs, 0x260);
+ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
+ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
+ASSERT_REG_POSITION(clip_x0, 0x280);
+ASSERT_REG_POSITION(clip_y0, 0x284);
+ASSERT_REG_POSITION(clip_width, 0x288);
+ASSERT_REG_POSITION(clip_height, 0x28c);
+ASSERT_REG_POSITION(clip_enable, 0x290);
+ASSERT_REG_POSITION(color_key_format, 0x294);
+ASSERT_REG_POSITION(color_key, 0x298);
+ASSERT_REG_POSITION(rop, 0x2A0);
+ASSERT_REG_POSITION(beta1, 0x2A4);
+ASSERT_REG_POSITION(beta4, 0x2A8);
+ASSERT_REG_POSITION(operation, 0x2AC);
+ASSERT_REG_POSITION(pattern_offset, 0x2B0);
+ASSERT_REG_POSITION(pattern_select, 0x2B4);
+ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
+ASSERT_REG_POSITION(color_pattern, 0x300);
+ASSERT_REG_POSITION(render_solid, 0x580);
+ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
+ASSERT_REG_POSITION(big_endian_control, 0x870);
+ASSERT_REG_POSITION(pixels_from_memory, 0x880);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 898370739..ba387506e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
-Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
- const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
- ASSERT(cbuf_mask[regs.tex_cb_index]);
-
- const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
- ASSERT(texinfo.Address() != 0);
-
- const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
- ASSERT(address < texinfo.Address() + texinfo.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
- return GetTextureInfo(tex_handle);
-}
-
-Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
- return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
-}
-
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
@@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
- const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
- result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
+ const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
+
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
+ result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7f2500aab..51a041202 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -209,11 +209,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- Texture::FullTextureInfo GetTexture(std::size_t offset) const;
-
- /// Given a texture handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
-
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 761962ed0..9be651e24 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cinttypes>
#include <cstring>
#include <optional>
#include "common/assert.h"
@@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
OnMemoryWrite();
}
return;
+ case MAXWELL3D_REG_INDEX(fragment_barrier):
+ return rasterizer->FragmentBarrier();
+ case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
+ return rasterizer->TiledCacheBarrier();
}
}
@@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() {
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
- const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
+ const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
- const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
+ const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}
-Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
- return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
-}
-
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
- const auto stage_index = static_cast<std::size_t>(stage);
- const auto& shader = state.shader_stages[stage_index];
- const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
- ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
- const GPUVAddr tex_info_address =
- tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
-
- ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
-
- return GetTextureInfo(tex_handle);
-}
-
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
}
void Maxwell3D::ProcessClearBuffers() {
- ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
- regs.clear_buffers.R == regs.clear_buffers.B &&
- regs.clear_buffers.R == regs.clear_buffers.A);
-
rasterizer->Clear();
}
@@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
- u32 result;
- std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
- return result;
+ return memory_manager.Read<u32>(buffer.address + offset);
}
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
- const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
- result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
+ const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
+
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
+ result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 564acbc53..bf9e07c9b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -438,16 +438,6 @@ public:
DecrWrapOGL = 0x8508,
};
- enum class MemoryLayout : u32 {
- Linear = 0,
- BlockLinear = 1,
- };
-
- enum class InvMemoryLayout : u32 {
- BlockLinear = 0,
- Linear = 1,
- };
-
enum class CounterReset : u32 {
SampleCnt = 0x01,
Unk02 = 0x02,
@@ -589,21 +579,31 @@ public:
NegativeW = 7,
};
+ enum class SamplerIndex : u32 {
+ Independently = 0,
+ ViaHeaderIndex = 1,
+ };
+
+ struct TileMode {
+ union {
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ BitField<12, 1, u32> is_pitch_linear;
+ BitField<16, 1, u32> is_3d;
+ };
+ };
+ static_assert(sizeof(TileMode) == 4);
+
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
+ TileMode tile_mode;
union {
- BitField<0, 3, u32> block_width;
- BitField<4, 3, u32> block_height;
- BitField<8, 3, u32> block_depth;
- BitField<12, 1, InvMemoryLayout> type;
- BitField<16, 1, u32> is_3d;
- } memory_layout;
- union {
- BitField<0, 16, u32> layers;
+ BitField<0, 16, u32> depth;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
@@ -832,7 +832,11 @@ public:
u32 patch_vertices;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0x4);
+
+ u32 fragment_barrier;
+
+ INSERT_UNION_PADDING_WORDS(0x7);
std::array<ScissorTest, NumViewports> scissor_test;
@@ -842,7 +846,15 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0x5);
+
+ u32 invalidate_texture_data_cache;
+
+ INSERT_UNION_PADDING_WORDS(0x1);
+
+ u32 tiled_cache_barrier;
+
+ INSERT_UNION_PADDING_WORDS(0x4);
u32 color_mask_common;
@@ -866,12 +878,7 @@ public:
u32 address_high;
u32 address_low;
Tegra::DepthFormat format;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- BitField<20, 1, InvMemoryLayout> type;
- } memory_layout;
+ TileMode tile_mode;
u32 layer_stride;
GPUVAddr Address() const {
@@ -880,7 +887,18 @@ public:
}
} zeta;
- INSERT_UNION_PADDING_WORDS(0x41);
+ struct {
+ union {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> width;
+ };
+ union {
+ BitField<0, 16, u32> y;
+ BitField<16, 16, u32> height;
+ };
+ } render_area;
+
+ INSERT_UNION_PADDING_WORDS(0x3F);
union {
BitField<0, 4, u32> stencil;
@@ -921,7 +939,7 @@ public:
BitField<25, 3, u32> map_7;
};
- u32 GetMap(std::size_t index) const {
+ u32 Map(std::size_t index) const {
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
map_4, map_5, map_6, map_7};
ASSERT(index < maps.size());
@@ -934,11 +952,13 @@ public:
u32 zeta_width;
u32 zeta_height;
union {
- BitField<0, 16, u32> zeta_layers;
+ BitField<0, 16, u32> zeta_depth;
BitField<16, 1, u32> zeta_volume;
};
- INSERT_UNION_PADDING_WORDS(0x26);
+ SamplerIndex sampler_index;
+
+ INSERT_UNION_PADDING_WORDS(0x25);
u32 depth_test_enable;
@@ -964,6 +984,7 @@ public:
float b;
float a;
} blend_color;
+
INSERT_UNION_PADDING_WORDS(0x4);
struct {
@@ -1001,7 +1022,12 @@ public:
float line_width_smooth;
float line_width_aliased;
- INSERT_UNION_PADDING_WORDS(0x1F);
+ INSERT_UNION_PADDING_WORDS(0x1B);
+
+ u32 invalidate_sampler_cache_no_wfi;
+ u32 invalidate_texture_header_cache_no_wfi;
+
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 vb_element_base;
u32 vb_base_instance;
@@ -1045,13 +1071,13 @@ public:
} condition;
struct {
- u32 tsc_address_high;
- u32 tsc_address_low;
- u32 tsc_limit;
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
- GPUVAddr TSCAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
}
} tsc;
@@ -1062,13 +1088,13 @@ public:
u32 line_smooth_enable;
struct {
- u32 tic_address_high;
- u32 tic_address_low;
- u32 tic_limit;
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
- GPUVAddr TICAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
}
} tic;
@@ -1397,12 +1423,6 @@ public:
void FlushMMEInlineDraw();
- /// Given a texture handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
-
- /// Returns the texture information for a specific texture in a specific shader stage.
- Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
-
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
ASSERT_REG_POSITION(patch_vertices, 0x373);
+ASSERT_REG_POSITION(fragment_barrier, 0x378);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
+ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
ASSERT_REG_POSITION(zeta, 0x3F8);
+ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
-ASSERT_REG_POSITION(zeta_layers, 0x48c);
+ASSERT_REG_POSITION(zeta_depth, 0x48c);
+ASSERT_REG_POSITION(sampler_index, 0x48D);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
+ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
+ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 1c29e895e..ba750748c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
}
void MaxwellDMA::CopyBlockLinearToPitch() {
+ UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
@@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
}
void MaxwellDMA::CopyPitchToBlockLinear() {
+ UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
const u32 width = dst_params.width;