summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/dma_pusher.cpp7
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h22
-rw-r--r--src/video_core/gpu_thread.h6
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/rasterizer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_device.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp313
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp54
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h28
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp7
-rw-r--r--src/video_core/shader/decode/memory.cpp30
-rw-r--r--src/video_core/shader/decode/other.cpp13
-rw-r--r--src/video_core/shader/shader_ir.cpp20
-rw-r--r--src/video_core/shader/shader_ir.h86
-rw-r--r--src/video_core/shader/track.cpp12
17 files changed, 398 insertions, 231 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 036e66f05..3175579cc 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -40,6 +40,13 @@ bool DmaPusher::Step() {
}
const CommandList& command_list{dma_pushbuffer.front()};
+ ASSERT_OR_EXECUTE(!command_list.empty(), {
+ // Somehow the command_list is empty, in order to avoid a crash
+ // We ignore it and assume its size is 0.
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ return true;
+ });
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 48e4fec33..f342c78e6 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -59,6 +59,7 @@ public:
static constexpr std::size_t NumCBData = 16;
static constexpr std::size_t NumVertexArrays = 32;
static constexpr std::size_t NumVertexAttributes = 32;
+ static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumTextureSamplers = 32;
static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t MaxShaderProgram = 6;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e5b4eadea..7bbc556da 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -98,6 +98,10 @@ union Attribute {
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
BitField<47, 3, AttributeSize> size;
+
+ bool IsPhysical() const {
+ return element == 0 && static_cast<u64>(index.Value()) == 0;
+ }
} fmt20;
union {
@@ -499,6 +503,11 @@ enum class SystemVariable : u64 {
CircularQueueEntryAddressHigh = 0x63,
};
+enum class PhysicalAttributeDirection : u64 {
+ Input = 0,
+ Output = 1,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -587,6 +596,7 @@ union Instruction {
} alu;
union {
+ BitField<38, 1, u64> idx;
BitField<51, 1, u64> saturate;
BitField<52, 2, IpaSampleMode> sample_mode;
BitField<54, 2, IpaInterpMode> interp_mode;
@@ -812,6 +822,12 @@ union Instruction {
} stg;
union {
+ BitField<32, 1, PhysicalAttributeDirection> direction;
+ BitField<47, 3, AttributeSize> size;
+ BitField<20, 11, u64> address;
+ } al2p;
+
+ union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<7, 1, u64> abs_a;
@@ -1374,8 +1390,9 @@ public:
ST_A,
ST_L,
ST_S,
- LDG, // Load from global memory
- STG, // Store in global memory
+ LDG, // Load from global memory
+ STG, // Store in global memory
+ AL2P, // Transforms attribute memory into physical memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
@@ -1646,6 +1663,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+ INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cdf86f562..05a168a72 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -81,12 +81,6 @@ struct CommandDataContainer {
CommandDataContainer(CommandData&& data, u64 next_fence)
: data{std::move(data)}, fence{next_fence} {}
- CommandDataContainer& operator=(const CommandDataContainer& t) {
- data = std::move(t.data);
- fence = t.fence;
- return *this;
- }
-
CommandData data;
u64 fence{};
};
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index fbea107ca..c766ed692 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -120,7 +120,9 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
// An instruction with the Exit flag will not actually
// cause an exit if it's executed inside a delay slot.
- if (opcode.is_exit && !is_delay_slot) {
+ // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
+ // testing on the MME code.
+ if (opcode.is_exit) {
// Exit has a delay slot, execute the next instruction
Step(offset, true);
return false;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index f820f3ed9..0c4ea1494 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -144,8 +144,9 @@ protected:
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
+ const CacheAddr addr = object->GetCacheAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
- map_cache.erase(object->GetCacheAddr());
+ map_cache.erase(addr);
}
/// Returns a ticks counter used for tracking when cached objects were last modified
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b6d9e0ddb..38497678a 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,18 @@ T GetInteger(GLenum pname) {
Device::Device() {
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+ max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
+ max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_variable_aoffi = TestVariableAoffi();
}
+Device::Device(std::nullptr_t) {
+ uniform_buffer_alignment = 0;
+ max_vertex_attributes = 16;
+ max_varyings = 15;
+ has_variable_aoffi = true;
+}
+
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
uniform sampler2D tex;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 78ff5ee58..de8490682 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -5,17 +5,27 @@
#pragma once
#include <cstddef>
+#include "common/common_types.h"
namespace OpenGL {
class Device {
public:
- Device();
+ explicit Device();
+ explicit Device(std::nullptr_t);
std::size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
+ u32 GetMaxVertexAttributes() const {
+ return max_vertex_attributes;
+ }
+
+ u32 GetMaxVaryings() const {
+ return max_varyings;
+ }
+
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
@@ -24,6 +34,8 @@ private:
static bool TestVariableAoffi();
std::size_t uniform_buffer_alignment{};
+ u32 max_vertex_attributes{};
+ u32 max_varyings{};
bool has_variable_aoffi{};
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1a62795e1..4bff54a59 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -134,6 +134,19 @@ bool IsPrecise(Node node) {
return false;
}
+constexpr bool IsGenericAttribute(Attribute::Index index) {
+ return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
+}
+
+constexpr Attribute::Index ToGenericAttribute(u32 value) {
+ return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0));
+}
+
+u32 GetGenericAttributeIndex(Attribute::Index index) {
+ ASSERT(IsGenericAttribute(index));
+ return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
@@ -152,6 +165,7 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
+ DeclarePhysicalAttributeReader();
code.AddLine("void execute_" + suffix + "() {");
++code.scope;
@@ -296,76 +310,95 @@ private:
}
std::string GetInputFlags(AttributeUse attribute) {
- std::string out;
-
switch (attribute) {
- case AttributeUse::Constant:
- out += "flat ";
- break;
- case AttributeUse::ScreenLinear:
- out += "noperspective ";
- break;
case AttributeUse::Perspective:
// Default, Smooth
- break;
+ return {};
+ case AttributeUse::Constant:
+ return "flat ";
+ case AttributeUse::ScreenLinear:
+ return "noperspective ";
default:
- LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
- UNREACHABLE();
+ case AttributeUse::Unused:
+ UNREACHABLE_MSG("Unused attribute being fetched");
+ return {};
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
+ return {};
}
- return out;
}
void DeclareInputAttributes() {
- const auto& attributes = ir.GetInputAttributes();
- for (const auto element : attributes) {
- const Attribute::Index index = element.first;
- if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
- // Skip when it's not a generic attribute
- continue;
- }
-
- // TODO(bunnei): Use proper number of elements for these
- u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
- if (stage != ShaderStage::Vertex) {
- // If inputs are varyings, add an offset
- idx += GENERIC_VARYING_START_LOCATION;
+ if (ir.HasPhysicalAttributes()) {
+ const u32 num_inputs{GetNumPhysicalInputAttributes()};
+ for (u32 i = 0; i < num_inputs; ++i) {
+ DeclareInputAttribute(ToGenericAttribute(i), true);
}
+ code.AddNewLine();
+ return;
+ }
- std::string attr = GetInputAttribute(index);
- if (stage == ShaderStage::Geometry) {
- attr = "gs_" + attr + "[]";
- }
- std::string suffix;
- if (stage == ShaderStage::Fragment) {
- const auto input_mode =
- header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
- suffix = GetInputFlags(input_mode);
+ const auto& attributes = ir.GetInputAttributes();
+ for (const auto index : attributes) {
+ if (IsGenericAttribute(index)) {
+ DeclareInputAttribute(index, false);
}
- code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
- attr + ';');
}
if (!attributes.empty())
code.AddNewLine();
}
+ void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
+ const u32 generic_index{GetGenericAttributeIndex(index)};
+
+ std::string name{GetInputAttribute(index)};
+ if (stage == ShaderStage::Geometry) {
+ name = "gs_" + name + "[]";
+ }
+
+ std::string suffix;
+ if (stage == ShaderStage::Fragment) {
+ const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+ if (skip_unused && input_mode == AttributeUse::Unused) {
+ return;
+ }
+ suffix = GetInputFlags(input_mode);
+ }
+
+ u32 location = generic_index;
+ if (stage != ShaderStage::Vertex) {
+ // If inputs are varyings, add an offset
+ location += GENERIC_VARYING_START_LOCATION;
+ }
+
+ code.AddLine("layout (location = " + std::to_string(location) + ") " + suffix + "in vec4 " +
+ name + ';');
+ }
+
void DeclareOutputAttributes() {
+ if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) {
+ for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
+ DeclareOutputAttribute(ToGenericAttribute(i));
+ }
+ code.AddNewLine();
+ return;
+ }
+
const auto& attributes = ir.GetOutputAttributes();
for (const auto index : attributes) {
- if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
- // Skip when it's not a generic attribute
- continue;
+ if (IsGenericAttribute(index)) {
+ DeclareOutputAttribute(index);
}
- // TODO(bunnei): Use proper number of elements for these
- const auto idx = static_cast<u32>(index) -
- static_cast<u32>(Attribute::Index::Attribute_0) +
- GENERIC_VARYING_START_LOCATION;
- code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
- GetOutputAttribute(index) + ';');
}
if (!attributes.empty())
code.AddNewLine();
}
+ void DeclareOutputAttribute(Attribute::Index index) {
+ const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+ code.AddLine("layout (location = " + std::to_string(location) + ") out vec4 " +
+ GetOutputAttribute(index) + ';');
+ }
+
void DeclareConstantBuffers() {
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
@@ -429,6 +462,39 @@ private:
code.AddNewLine();
}
+ void DeclarePhysicalAttributeReader() {
+ if (!ir.HasPhysicalAttributes()) {
+ return;
+ }
+ code.AddLine("float readPhysicalAttribute(uint physical_address) {");
+ ++code.scope;
+ code.AddLine("switch (physical_address) {");
+
+ // Just declare generic attributes for now.
+ const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
+ for (u32 index = 0; index < num_attributes; ++index) {
+ const auto attribute{ToGenericAttribute(index)};
+ for (u32 element = 0; element < 4; ++element) {
+ constexpr u32 generic_base{0x80};
+ constexpr u32 generic_stride{16};
+ constexpr u32 element_stride{4};
+ const u32 address{generic_base + index * generic_stride + element * element_stride};
+
+ const bool declared{stage != ShaderStage::Fragment ||
+ header.ps.GetAttributeUse(index) != AttributeUse::Unused};
+ const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
+ code.AddLine(fmt::format("case 0x{:x}: return {};", address, value));
+ }
+ }
+
+ code.AddLine("default: return 0;");
+
+ code.AddLine('}');
+ --code.scope;
+ code.AddLine('}');
+ code.AddNewLine();
+ }
+
void VisitBlock(const NodeBlock& bb) {
for (const Node node : bb) {
if (const std::string expr = Visit(node); !expr.empty()) {
@@ -483,70 +549,12 @@ private:
return value;
} else if (const auto abuf = std::get_if<AbufNode>(node)) {
- const auto attribute = abuf->GetIndex();
- const auto element = abuf->GetElement();
-
- const auto GeometryPass = [&](const std::string& name) {
- if (stage == ShaderStage::Geometry && abuf->GetBuffer()) {
- // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
- // set an 0x80000000 index for those and the shader fails to build. Find out why
- // this happens and what's its intent.
- return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) +
- ") % MAX_VERTEX_INPUT]";
- }
- return name;
- };
-
- switch (attribute) {
- case Attribute::Index::Position:
- if (stage != ShaderStage::Fragment) {
- return GeometryPass("position") + GetSwizzle(element);
- } else {
- return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
- }
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return "gl_PointCoord.x";
- case 1:
- return "gl_PointCoord.y";
- case 2:
- case 3:
- return "0";
- }
- UNREACHABLE();
- return "0";
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- ASSERT(stage == ShaderStage::Vertex);
- switch (element) {
- case 2:
- // Config pack's first value is instance_id.
- return "uintBitsToFloat(config_pack[0])";
- case 3:
- return "uintBitsToFloat(gl_VertexID)";
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return "0";
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderStage::Fragment);
- switch (element) {
- case 3:
- return "itof(gl_FrontFacing ? -1 : 0)";
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return "0";
- default:
- if (attribute >= Attribute::Index::Attribute_0 &&
- attribute <= Attribute::Index::Attribute_31) {
- return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
- }
- break;
+ UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
+ "Physical attributes in geometry shaders are not implemented");
+ if (abuf->IsPhysicalBuffer()) {
+ return "readPhysicalAttribute(ftou(" + Visit(abuf->GetPhysicalAddress()) + "))";
}
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
} else if (const auto cbuf = std::get_if<CbufNode>(node)) {
const Node offset = cbuf->GetOffset();
@@ -598,6 +606,69 @@ private:
return {};
}
+ std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) {
+ const auto GeometryPass = [&](std::string name) {
+ if (stage == ShaderStage::Geometry && buffer) {
+ // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
+ // set an 0x80000000 index for those and the shader fails to build. Find out why
+ // this happens and what's its intent.
+ return "gs_" + std::move(name) + "[ftou(" + Visit(buffer) + ") % MAX_VERTEX_INPUT]";
+ }
+ return name;
+ };
+
+ switch (attribute) {
+ case Attribute::Index::Position:
+ if (stage != ShaderStage::Fragment) {
+ return GeometryPass("position") + GetSwizzle(element);
+ } else {
+ return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
+ }
+ case Attribute::Index::PointCoord:
+ switch (element) {
+ case 0:
+ return "gl_PointCoord.x";
+ case 1:
+ return "gl_PointCoord.y";
+ case 2:
+ case 3:
+ return "0";
+ }
+ UNREACHABLE();
+ return "0";
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ // TODO(Subv): Find out what the values are for the first two elements when inside a
+ // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+ // shader.
+ ASSERT(stage == ShaderStage::Vertex);
+ switch (element) {
+ case 2:
+ // Config pack's first value is instance_id.
+ return "uintBitsToFloat(config_pack[0])";
+ case 3:
+ return "uintBitsToFloat(gl_VertexID)";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+ return "0";
+ case Attribute::Index::FrontFacing:
+ // TODO(Subv): Find out what the values are for the other elements.
+ ASSERT(stage == ShaderStage::Fragment);
+ switch (element) {
+ case 3:
+ return "itof(gl_FrontFacing ? -1 : 0)";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+ return "0";
+ default:
+ if (IsGenericAttribute(attribute)) {
+ return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+ }
+ break;
+ }
+ UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ return "0";
+ }
+
std::string ApplyPrecise(Operation operation, const std::string& value) {
if (!IsPrecise(operation)) {
return value;
@@ -833,6 +904,8 @@ private:
target = GetRegister(gpr->GetIndex());
} else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+ UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
+
target = [&]() -> std::string {
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
case Attribute::Index::Position:
@@ -844,8 +917,7 @@ private:
case Attribute::Index::ClipDistances4567:
return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']';
default:
- if (attribute >= Attribute::Index::Attribute_0 &&
- attribute <= Attribute::Index::Attribute_31) {
+ if (IsGenericAttribute(attribute)) {
return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
@@ -1591,15 +1663,11 @@ private:
}
std::string GetInputAttribute(Attribute::Index attribute) const {
- const auto index{static_cast<u32>(attribute) -
- static_cast<u32>(Attribute::Index::Attribute_0)};
- return GetDeclarationWithSuffix(index, "input_attr");
+ return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
}
std::string GetOutputAttribute(Attribute::Index attribute) const {
- const auto index{static_cast<u32>(attribute) -
- static_cast<u32>(Attribute::Index::Attribute_0)};
- return GetDeclarationWithSuffix(index, "output_attr");
+ return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
}
std::string GetConstBuffer(u32 index) const {
@@ -1640,6 +1708,19 @@ private:
return name + '_' + std::to_string(index) + '_' + suffix;
}
+ u32 GetNumPhysicalInputAttributes() const {
+ return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
+ }
+
+ u32 GetNumPhysicalAttributes() const {
+ return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
+ }
+
+ u32 GetNumPhysicalVaryings() const {
+ return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
+ Maxwell::NumVaryings);
+ }
+
const Device& device;
const ShaderIR& ir;
const ShaderStage stage;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 254c0d499..fba9c594a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
- : system{system}, precompiled_cache_virtual_file_offset{0} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- const auto entry = LoadDecompiledEntry();
+ auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
@@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- std::vector<u8> code(code_size);
+ std::string code(code_size, '\0');
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
ShaderDiskCacheDecompiled entry;
- entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+ entry.code = std::move(code);
u32 const_buffers_count{};
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
@@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
- u8 is_indirect{};
+ bool is_indirect{};
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
- entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+ entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
}
u32 samplers_count{};
@@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u64 offset{};
u64 index{};
u32 type{};
- u8 is_array{};
- u8 is_shadow{};
- u8 is_bindless{};
+ bool is_array{};
+ bool is_shadow{};
+ bool is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
- entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
- static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::TextureType>(type),
- is_array != 0, is_shadow != 0, is_bindless != 0);
+ entry.entries.samplers.emplace_back(
+ static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+ static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
}
u32 global_memory_count{};
@@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
- u8 is_read{};
- u8 is_written{};
+ bool is_read{};
+ bool is_written{};
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
- entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
- is_written != 0);
+ entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
+ is_written);
}
for (auto& clip_distance : entry.entries.clip_distances) {
- u8 clip_distance_raw{};
- if (!LoadObjectFromPrecompiled(clip_distance_raw))
+ if (!LoadObjectFromPrecompiled(clip_distance)) {
return {};
- clip_distance = clip_distance_raw != 0;
+ }
}
u64 shader_length{};
@@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& cbuf : entries.const_buffers) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
return false;
}
}
@@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(sampler.IsArray()) ||
+ !SaveObjectToPrecompiled(sampler.IsShadow()) ||
+ !SaveObjectToPrecompiled(sampler.IsBindless())) {
return false;
}
}
@@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& gmem : entries.global_memory_entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
- if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
+ if (!SaveObjectToPrecompiled(clip_distance)) {
return false;
}
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 0142b2e3b..2da0a4a23 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -70,14 +70,14 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
- std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+ std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
- std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+ std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
}
@@ -162,6 +162,7 @@ struct ShaderDiskCacheDump {
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL(Core::System& system);
+ ~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -259,20 +260,35 @@ private:
return SaveArrayToPrecompiled(&object, 1);
}
+ bool SaveObjectToPrecompiled(bool object) {
+ const auto value = static_cast<u8>(object);
+ return SaveArrayToPrecompiled(&value, 1);
+ }
+
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
- // Copre system
+ bool LoadObjectFromPrecompiled(bool& object) {
+ u8 value;
+ const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
+ if (!read_ok) {
+ return false;
+ }
+
+ object = value != 0;
+ return true;
+ }
+
+ // Core system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
+ // Stores whole precompiled cache which will be read from/saved to the precompiled cache file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset;
+ std::size_t precompiled_cache_virtual_file_offset = 0;
// The cache has been loaded at boot
bool tried_to_load{};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a11000f6b..b61a6d170 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -194,8 +194,8 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
- for (const auto& attr : ir.GetInputAttributes()) {
- entries.attributes.insert(GetGenericAttributeLocation(attr.first));
+ for (const auto& attribute : ir.GetInputAttributes()) {
+ entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -321,8 +321,7 @@ private:
}
void DeclareInputAttributes() {
- for (const auto element : ir.GetInputAttributes()) {
- const Attribute::Index index = element.first;
+ for (const auto index : ir.GetInputAttributes()) {
if (!IsGenericAttribute(index)) {
continue;
}
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea1092db1..6a992c543 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -12,6 +12,8 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
+#pragma optimize("", off)
+
namespace VideoCommon::Shader {
using Tegra::Shader::Attribute;
@@ -47,17 +49,20 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
+ UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
+ instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
+ "Non-32 bits PHYS reads are not implemented");
- Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
- Tegra::Shader::IpaSampleMode::Default};
+ const Node buffer{GetRegister(instr.gpr39)};
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
- const Node buffer = GetRegister(instr.gpr39);
- const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
- next_element, input_mode, buffer);
+ const Node attribute{instr.attribute.fmt20.IsPhysical()
+ ? GetPhysicalInputAttribute(instr.gpr8, buffer)
+ : GetInputAttribute(static_cast<Attribute::Index>(next_index),
+ next_element, buffer)};
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
@@ -239,6 +244,21 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::AL2P: {
+ // Ignore al2p.direction since we don't care about it.
+
+ // Calculate emulation fake physical address.
+ const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
+ const Node reg{GetRegister(instr.gpr8)};
+ const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
+
+ // Set the fake address to target register.
+ SetRegister(bb, instr.gpr0, fake_address);
+
+ // Signal the shader IR to declare all possible attributes and varyings
+ uses_physical_attributes = true;
+ break;
+ }
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d750a2936..fa17c45b5 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -130,15 +130,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::IPA: {
- const auto& attribute = instr.attribute.fmt28;
+ const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
+
+ const auto attribute = instr.attribute.fmt28;
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
instr.ipa.sample_mode.Value()};
- const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
- Node value = attr;
+ Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
+ : GetInputAttribute(attribute.index, attribute.element);
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
- if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
- index <= Tegra::Shader::Attribute::Index::Attribute_31) {
+ const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+ index <= Tegra::Shader::Attribute::Index::Attribute_31;
+ if (is_generic || is_physical) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e4eb0dfd9..153ad1fd0 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
+ : program_code{program_code}, main_offset{main_offset} {
+ Decode();
+}
+
+ShaderIR::~ShaderIR() = default;
+
Node ShaderIR::StoreNode(NodeData&& node_data) {
auto store = std::make_unique<NodeData>(node_data);
const Node node = store.get();
@@ -89,13 +96,14 @@ Node ShaderIR::GetPredicate(bool immediate) {
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
}
-Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
- const Tegra::Shader::IpaMode& input_mode, Node buffer) {
- const auto [entry, is_new] =
- used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
- entry->second.insert(input_mode);
+Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
+ used_input_attributes.emplace(index);
+ return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
+}
- return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
+Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
+ uses_physical_attributes = true;
+ return StoreNode(AbufNode(GetRegister(physical_address), buffer));
}
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 65f1e1de9..0bf124252 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -328,40 +328,31 @@ struct MetaTexture {
u32 element{};
};
-inline constexpr MetaArithmetic PRECISE = {true};
-inline constexpr MetaArithmetic NO_PRECISE = {false};
+constexpr MetaArithmetic PRECISE = {true};
+constexpr MetaArithmetic NO_PRECISE = {false};
using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {
public:
- template <typename... T>
- explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
+ explicit OperationNode(OperationCode code) : code{code} {}
- template <typename... T>
- explicit constexpr OperationNode(OperationCode code, Meta&& meta)
- : code{code}, meta{std::move(meta)} {}
+ explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
template <typename... T>
- explicit constexpr OperationNode(OperationCode code, const T*... operands)
+ explicit OperationNode(OperationCode code, const T*... operands)
: OperationNode(code, {}, operands...) {}
template <typename... T>
- explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
- : code{code}, meta{std::move(meta)} {
-
- auto operands_list = {operands_...};
- for (auto& operand : operands_list) {
- operands.push_back(operand);
- }
- }
+ explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
+ : code{code}, meta{std::move(meta)}, operands{operands_...} {}
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
: code{code}, meta{meta}, operands{std::move(operands)} {}
explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
- : code{code}, meta{}, operands{std::move(operands)} {}
+ : code{code}, operands{std::move(operands)} {}
OperationCode GetCode() const {
return code;
@@ -465,17 +456,14 @@ private:
/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
class AbufNode final {
public:
- explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
- const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
- : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
-
+ // Initialize for standard attributes (index is explicit).
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
Node buffer = {})
- : input_mode{}, buffer{buffer}, index{index}, element{element} {}
+ : buffer{buffer}, index{index}, element{element} {}
- Tegra::Shader::IpaMode GetInputMode() const {
- return input_mode;
- }
+ // Initialize for physical attributes (index is a variable value).
+ explicit constexpr AbufNode(Node physical_address, Node buffer = {})
+ : physical_address{physical_address}, buffer{buffer} {}
Tegra::Shader::Attribute::Index GetIndex() const {
return index;
@@ -489,11 +477,19 @@ public:
return buffer;
}
+ bool IsPhysicalBuffer() const {
+ return physical_address != nullptr;
+ }
+
+ Node GetPhysicalAddress() const {
+ return physical_address;
+ }
+
private:
- const Tegra::Shader::IpaMode input_mode;
- const Node buffer;
- const Tegra::Shader::Attribute::Index index;
- const u32 element;
+ Node physical_address{};
+ Node buffer{};
+ Tegra::Shader::Attribute::Index index{};
+ u32 element{};
};
/// Constant buffer node, usually mapped to uniform buffers in GLSL
@@ -567,11 +563,8 @@ private:
class ShaderIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
- : program_code{program_code}, main_offset{main_offset} {
-
- Decode();
- }
+ explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
+ ~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
return basic_blocks;
@@ -585,8 +578,7 @@ public:
return used_predicates;
}
- const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
- GetInputAttributes() const {
+ const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
return used_input_attributes;
}
@@ -615,6 +607,10 @@ public:
return static_cast<std::size_t>(coverage_end * sizeof(u64));
}
+ bool HasPhysicalAttributes() const {
+ return uses_physical_attributes;
+ }
+
const Tegra::Shader::Header& GetHeader() const {
return header;
}
@@ -696,8 +692,9 @@ private:
/// Generates a predicate node for an immediate true or false value
Node GetPredicate(bool immediate);
/// Generates a node representing an input attribute. Keeps track of used attributes.
- Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
- const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
+ Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
+ /// Generates a node representing a physical input attribute.
+ Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
/// Generates a node representing an output attribute. Keeps track of used attributes.
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
/// Generates a node representing an internal flag
@@ -814,11 +811,12 @@ private:
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
Node op_c, Node imm_lut, bool sets_cc);
- Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+ Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+ std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
+ std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
+ s64 cursor) const;
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
Node addr_register,
@@ -835,12 +833,10 @@ private:
return StoreNode(OperationNode(code, std::move(meta), operands...));
}
- template <typename... T>
Node Operation(OperationCode code, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(operands)));
}
- template <typename... T>
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
}
@@ -872,13 +868,13 @@ private:
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
- std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
- used_input_attributes;
+ std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
+ bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
Tegra::Shader::Header header;
};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 4505667ff..19ede1eb9 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
for (; cursor >= 0; --cursor) {
const Node node = code.at(cursor);
if (const auto operation = std::get_if<OperationNode>(node)) {
- if (operation->GetCode() == operation_code)
+ if (operation->GetCode() == operation_code) {
return {node, cursor};
+ }
}
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
const auto& conditional_code = conditional->GetCode();
const auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
- if (found)
+ if (found) {
return {found, cursor};
+ }
}
}
return {};
}
} // namespace
-Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
// Cbuf found, but it has to be immediate
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
// that it uses as operand
const auto [found, found_cursor] =
@@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
- s64 cursor) {
+ s64 cursor) const {
for (; cursor >= 0; --cursor) {
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
if (!found_node) {