5 files changed, 39 insertions, 28 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8afd26fe9..bca014a4a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -13,8 +13,7 @@
 #include "video_core/renderer_base.h"
 #include "video_core/textures/texture.h"
 
-namespace Tegra {
-namespace Engines {
+namespace Tegra::Engines {
 
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;
@@ -408,5 +407,4 @@ void Maxwell3D::ProcessClearBuffers() {
     rasterizer.Clear();
 }
 
-} // namespace Engines
-} // namespace Tegra
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index c8af1c6b6..0e09a7ee5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -643,8 +643,10 @@ public:
                 u32 d3d_cull_mode;
 
                 ComparisonOp depth_test_func;
+                float alpha_test_ref;
+                ComparisonOp alpha_test_func;
 
-                INSERT_PADDING_WORDS(0xB);
+                INSERT_PADDING_WORDS(0x9);
 
                 struct {
                     u32 separate_alpha;
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
index 59e28b22d..8b5f08351 100644
--- a/src/video_core/engines/maxwell_compute.cpp
+++ b/src/video_core/engines/maxwell_compute.cpp
@@ -6,8 +6,7 @@
 #include "core/core.h"
 #include "video_core/engines/maxwell_compute.h"
 
-namespace Tegra {
-namespace Engines {
+namespace Tegra::Engines {
 
 void MaxwellCompute::WriteReg(u32 method, u32 value) {
     ASSERT_MSG(method < Regs::NUM_REGS,
@@ -26,5 +25,4 @@ void MaxwellCompute::WriteReg(u32 method, u32 value) {
     }
 }
 
-} // namespace Engines
-} // namespace Tegra
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 103cd110e..b8a78cf82 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -7,8 +7,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/decoders.h"
 
-namespace Tegra {
-namespace Engines {
+namespace Tegra::Engines {
 
 MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
     : memory_manager(memory_manager), rasterizer{rasterizer} {}
@@ -78,9 +77,9 @@ void MaxwellDMA::HandleCopy() {
 
     ASSERT(regs.exec.enable_2d == 1);
 
-    std::size_t copy_size = regs.x_count * regs.y_count;
+    const std::size_t copy_size = regs.x_count * regs.y_count;
 
-    const auto FlushAndInvalidate = [&](u32 src_size, u32 dst_size) {
+    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
         rasterizer.FlushRegion(source_cpu, src_size);
@@ -91,14 +90,11 @@ void MaxwellDMA::HandleCopy() {
         rasterizer.InvalidateRegion(dest_cpu, dst_size);
     };
 
-    u8* src_buffer = Memory::GetPointer(source_cpu);
-    u8* dst_buffer = Memory::GetPointer(dest_cpu);
-
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
         ASSERT(regs.src_params.size_z == 1);
         // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 
-        u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
 
         FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
                            copy_size * src_bytes_per_pixel);
@@ -111,7 +107,7 @@ void MaxwellDMA::HandleCopy() {
         ASSERT(regs.dst_params.size_z == 1);
         ASSERT(regs.src_pitch == regs.x_count);
 
-        u32 src_bpp = regs.src_pitch / regs.x_count;
+        const u32 src_bpp = regs.src_pitch / regs.x_count;
 
         FlushAndInvalidate(regs.src_pitch * regs.y_count,
                            regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
@@ -122,5 +118,4 @@ void MaxwellDMA::HandleCopy() {
     }
 }
 
-} // namespace Engines
-} // namespace Tegra
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e3d67ff87..6cd08d28b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,7 +214,7 @@ enum class IMinMaxExchange : u64 {
     XHi = 3,
 };
 
-enum class VmadType : u64 {
+enum class VideoType : u64 {
     Size16_Low = 0,
     Size16_High = 1,
     Size32 = 2,
@@ -564,6 +564,10 @@ union Instruction {
     } fmul;
 
     union {
+        BitField<55, 1, u64> saturate;
+    } fmul32;
+
+    union {
         BitField<48, 1, u64> is_signed;
     } shift;
 
@@ -753,7 +757,6 @@ union Instruction {
         BitField<45, 2, PredOperation> op;
         BitField<47, 1, u64> ftz;
         BitField<48, 4, PredCondition> cond;
-        BitField<56, 1, u64> neg_b;
     } fsetp;
 
     union {
@@ -780,6 +783,14 @@ union Instruction {
     } psetp;
 
     union {
+        BitField<43, 4, PredCondition> cond;
+        BitField<45, 2, PredOperation> op;
+        BitField<3, 3, u64> pred3;
+        BitField<0, 3, u64> pred0;
+        BitField<39, 3, u64> pred39;
+    } vsetp;
+
+    union {
         BitField<12, 3, u64> pred12;
         BitField<15, 1, u64> neg_pred12;
         BitField<24, 2, PredOperation> cond;
@@ -828,7 +839,6 @@ union Instruction {
         BitField<53, 1, u64> neg_b;
         BitField<54, 1, u64> abs_a;
         BitField<55, 1, u64> ftz;
-        BitField<56, 1, u64> neg_imm;
     } fset;
 
     union {
@@ -1152,15 +1162,17 @@ union Instruction {
     union {
         BitField<48, 1, u64> signed_a;
         BitField<38, 1, u64> is_byte_chunk_a;
-        BitField<36, 2, VmadType> type_a;
+        BitField<36, 2, VideoType> type_a;
         BitField<36, 2, u64> byte_height_a;
 
         BitField<49, 1, u64> signed_b;
         BitField<50, 1, u64> use_register_b;
         BitField<30, 1, u64> is_byte_chunk_b;
-        BitField<28, 2, VmadType> type_b;
+        BitField<28, 2, VideoType> type_b;
         BitField<28, 2, u64> byte_height_b;
+    } video;
 
+    union {
         BitField<51, 2, VmadShr> shr;
         BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
         BitField<47, 1, u64> cc;
@@ -1211,11 +1223,13 @@ public:
         KIL,
         SSY,
         SYNC,
+        BRK,
         DEPBAR,
         BFE_C,
         BFE_R,
         BFE_IMM,
         BRA,
+        PBK,
         LD_A,
         LD_C,
         ST_A,
@@ -1234,6 +1248,7 @@ public:
         OUT_R, // Emit vertex/primitive
         ISBERD,
         VMAD,
+        VSETP,
         FFMA_IMM, // Fused Multiply and Add
         FFMA_CR,
         FFMA_RC,
@@ -1372,7 +1387,7 @@ public:
     /// conditionally executed).
     static bool IsPredicatedInstruction(Id opcode) {
         // TODO(Subv): Add the rest of unpredicated instructions.
-        return opcode != Id::SSY;
+        return opcode != Id::SSY && opcode != Id::PBK;
     }
 
     class Matcher {
@@ -1468,9 +1483,11 @@ private:
 #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
             INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
             INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
+            INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
             INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
+            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
+            INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
             INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
-            INST("1111000011111---", Id::SYNC, Type::Synch, "SYNC"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
             INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
             INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
@@ -1489,6 +1506,7 @@ private:
             INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
             INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
             INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
+            INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"),
             INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
             INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
             INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -1608,4 +1626,4 @@ private:
     }
 };
 
-} // namespace Tegra::Shader
+} // namespace Tegra::Shader
+\ No newline at end of file