diff options
author | bunnei <bunneidev@gmail.com> | 2018-09-15 21:27:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-15 21:27:12 +0200 |
commit | ba480ea2fbb71bb8daa184a9ab82c3aee2e24681 (patch) | |
tree | ff8bea04a8de8757f0e287a7ecbef14ab0dbbfc3 | |
parent | Merge pull request #1271 from Subv/kepler_engine (diff) | |
parent | Shaders: Implemented multiple-word loads and stores to and from attribute memory. (diff) | |
download | yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.gz yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.bz2 yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.lz yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.xz yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.tar.zst yuzu-ba480ea2fbb71bb8daa184a9ab82c3aee2e24681.zip |
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 55 |
2 files changed, 58 insertions, 7 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 58f2904ce..d6e2397f2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -67,6 +67,13 @@ private: u64 value{}; }; +enum class AttributeSize : u64 { + Word = 0, + DoubleWord = 1, + TripleWord = 2, + QuadWord = 3, +}; + union Attribute { Attribute() = default; @@ -87,9 +94,10 @@ union Attribute { }; union { + BitField<20, 10, u64> immediate; BitField<22, 2, u64> element; BitField<24, 6, Index> index; - BitField<47, 3, u64> size; + BitField<47, 3, AttributeSize> size; } fmt20; union { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2d56370c7..81c0662d0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1772,13 +1772,34 @@ private: case OpCode::Type::Memory: { switch (opcode->GetId()) { case OpCode::Id::LD_A: { - ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); // Note: Shouldn't this be interp mode flat? As in no interpolation made. + ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, + "Indirect attribute loads are not supported"); + ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, + "Unaligned attribute loads are not supported"); Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, Tegra::Shader::IpaSampleMode::Default}; - regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element, - instr.attribute.fmt20.index, input_mode); + + u32 next_element = instr.attribute.fmt20.element; + u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); + + const auto LoadNextElement = [&](u32 reg_offset) { + regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, + static_cast<Attribute::Index>(next_index), + input_mode); + + // Load the next attribute element into the following register. If the element + // to load goes beyond the vec4 size, load the first element of the next + // attribute. + next_element = (next_element + 1) % 4; + next_index = next_index + (next_element == 0 ? 1 : 0); + }; + + const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; + for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { + LoadNextElement(reg_offset); + } break; } case OpCode::Id::LD_C: { @@ -1820,9 +1841,31 @@ private: break; } case OpCode::Id::ST_A: { - ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); - regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index, - instr.attribute.fmt20.element, instr.gpr0); + ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, + "Indirect attribute loads are not supported"); + ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, + "Unaligned attribute loads are not supported"); + + u32 next_element = instr.attribute.fmt20.element; + u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); + + const auto StoreNextElement = [&](u32 reg_offset) { + regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), + next_element, + instr.gpr0.Value() + reg_offset); + + // Load the next attribute element into the following register. If the element + // to load goes beyond the vec4 size, load the first element of the next + // attribute. + next_element = (next_element + 1) % 4; + next_index = next_index + (next_element == 0 ? 1 : 0); + }; + + const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; + for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { + StoreNextElement(reg_offset); + } + break; } case OpCode::Id::TEX: { |