summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/conversion.cpp15
-rw-r--r--src/video_core/shader/decode/memory.cpp38
-rw-r--r--src/video_core/shader/decode/texture.cpp13
3 files changed, 54 insertions, 12 deletions
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 32facd6ba..0eeb75559 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
- Node value = [&]() {
+ Node value = [&] {
switch (opcode->get().GetId()) {
case OpCode::Id::I2F_R:
return GetRegister(instr.gpr20);
@@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
+
const bool input_signed = instr.conversion.is_input_signed;
+
+ if (instr.conversion.src_size == Register::Size::Byte) {
+ const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
+ if (offset > 0) {
+ value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+ std::move(value), Immediate(offset));
+ }
+ } else {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+ }
+
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 78e92f52e..c934d0719 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -22,7 +22,23 @@ using Tegra::Shader::Register;
namespace {
-u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+ switch (uniform_type) {
+ case Tegra::Shader::UniformType::UnsignedByte:
+ case Tegra::Shader::UniformType::Single:
+ return 1;
+ case Tegra::Shader::UniformType::Double:
+ return 2;
+ case Tegra::Shader::UniformType::Quad:
+ case Tegra::Shader::UniformType::UnsignedQuad:
+ return 4;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+ return 1;
+ }
+}
+
+u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
case Tegra::Shader::UniformType::Single:
return 1;
@@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const auto [real_address_base, base_address, descriptor] =
TrackGlobalMemory(bb, instr, false);
- const u32 count = GetUniformTypeElementsCount(type);
+ const u32 count = GetLdgMemorySize(type);
if (!real_address_base || !base_address) {
// Tracking failed, load zeroes.
for (u32 i = 0; i < count; ++i) {
@@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
- const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
- const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+ const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+ Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+
+ if (type == Tegra::Shader::UniformType::UnsignedByte) {
+ // To handle unaligned loads get the byte used to dereferenced global memory
+ // and extract that byte from the loaded uint32.
+ Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
+ byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+
+ gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
+ Immediate(8));
+ }
SetTemporary(bb, i, gmem);
}
+
for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
@@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
- const u32 count = GetUniformTypeElementsCount(type);
+ const u32 count = GetStgMemorySize(type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index dd8ff851e..4b14cdf58 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -751,13 +751,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- // Fill empty entries from the guest sampler.
+ // Fill empty entries from the guest sampler
const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
if (type_coord_count != entry_coord_count) {
LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
- }
- for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
- coords.push_back(GetRegister(Register::ZeroIndex));
+
+ // When the size is higher we insert zeroes
+ for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
+ coords.push_back(GetRegister(Register::ZeroIndex));
+ }
+
+ // Then we ensure the size matches the number of entries (dropping unused values)
+ coords.resize(entry_coord_count);
}
Node4 values;