author: bunnei <bunneidev@gmail.com> 2018-09-15 21:27:12 +0200
committer: GitHub <noreply@github.com> 2018-09-15 21:27:12 +0200
commit: ba480ea2fbb71bb8daa184a9ab82c3aee2e24681 (patch)
tree: ff8bea04a8de8757f0e287a7ecbef14ab0dbbfc3
parent: Merge pull request #1271 from Subv/kepler_engine (diff)
parent: Shaders: Implemented multiple-word loads and stores to and from attribute memory. (diff)
download: yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar
yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.gz
yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.bz2
yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.lz
yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.xz
yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.zst
yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.zip
2 files changed, 58 insertions, 7 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 58f2904ce..d6e2397f2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -67,6 +67,13 @@ private:
     u64 value{};
 };
 
+enum class AttributeSize : u64 {
+    Word = 0,
+    DoubleWord = 1,
+    TripleWord = 2,
+    QuadWord = 3,
+};
+
 union Attribute {
     Attribute() = default;
 
@@ -87,9 +94,10 @@ union Attribute {
     };
 
     union {
+        BitField<20, 10, u64> immediate;
         BitField<22, 2, u64> element;
         BitField<24, 6, Index> index;
-        BitField<47, 3, u64> size;
+        BitField<47, 3, AttributeSize> size;
     } fmt20;
 
     union {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2d56370c7..81c0662d0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1772,13 +1772,34 @@ private:
         case OpCode::Type::Memory: {
             switch (opcode->GetId()) {
             case OpCode::Id::LD_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
 
                 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
                                                   Tegra::Shader::IpaSampleMode::Default};
-                regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
-                                                instr.attribute.fmt20.index, input_mode);
+
+                u32 next_element = instr.attribute.fmt20.element;
+                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+
+                const auto LoadNextElement = [&](u32 reg_offset) {
+                    regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
+                                                    static_cast<Attribute::Index>(next_index),
+                                                    input_mode);
+
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    LoadNextElement(reg_offset);
+                }
                 break;
             }
             case OpCode::Id::LD_C: {
@@ -1820,9 +1841,31 @@ private:
                 break;
             }
             case OpCode::Id::ST_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
-                regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
-                                                  instr.attribute.fmt20.element, instr.gpr0);
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
+
+                u32 next_element = instr.attribute.fmt20.element;
+                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+
+                const auto StoreNextElement = [&](u32 reg_offset) {
+                    regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
+                                                      next_element,
+                                                      instr.gpr0.Value() + reg_offset);
+
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    StoreNextElement(reg_offset);
+                }
+
                 break;
             }
             case OpCode::Id::TEX: {
author	bunnei <bunneidev@gmail.com>	2018-09-15 21:27:12 +0200
committer	GitHub <noreply@github.com>	2018-09-15 21:27:12 +0200
commit	ba480ea2fbb71bb8daa184a9ab82c3aee2e24681 (patch)
tree	ff8bea04a8de8757f0e287a7ecbef14ab0dbbfc3
parent	Merge pull request #1271 from Subv/kepler_engine (diff)
parent	Shaders: Implemented multiple-word loads and stores to and from attribute memory. (diff)
download	yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.gz yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.bz2 yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.lz yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.xz yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.zst yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.zip