From e2a2a556b9713f7c2e8dc20dbdaff80996fa6b91 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 9 Jan 2020 01:08:55 -0300 Subject: shader_ir/memory: Implement u16 and u8 for STG and LDG Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value. --- src/video_core/shader/decode/memory.cpp | 84 ++++++++++++++++++++------------- src/video_core/shader/shader_ir.h | 2 +- 2 files changed, 52 insertions(+), 34 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index c934d0719..8cc84e935 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -6,6 +6,7 @@ #include #include +#include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" @@ -22,34 +23,39 @@ using Tegra::Shader::Register; namespace { -u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { +bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { + return uniform_type == Tegra::Shader::UniformType::UnsignedByte || + uniform_type == Tegra::Shader::UniformType::UnsignedShort; +} + +u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { case Tegra::Shader::UniformType::UnsignedByte: - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; + return 0b11; + case Tegra::Shader::UniformType::UnsignedShort: + return 0b10; default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast(uniform_type)); - return 1; + UNREACHABLE(); + return 0; } } -u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { +u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { + case Tegra::Shader::UniformType::UnsignedByte: + return 8; + case Tegra::Shader::UniformType::UnsignedShort: + return 16; case Tegra::Shader::UniformType::Single: - return 1; + return 32; case Tegra::Shader::UniformType::Double: - return 2; + return 64; case Tegra::Shader::UniformType::Quad: case Tegra::Shader::UniformType::UnsignedQuad: - return 4; + return 128; default: UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast(uniform_type)); - return 1; + return 32; } } @@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, true, false); - const u32 count = GetLdgMemorySize(type); + const u32 size = GetMemorySize(type); + const u32 count = Common::AlignUp(size, 32) / 32; if (!real_address_base || !base_address) { // Tracking failed, load zeroes. for (u32 i = 0; i < count; ++i) { @@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); Node gmem = MakeNode(real_address, base_address, descriptor); - if (type == Tegra::Shader::UniformType::UnsignedByte) { - // To handle unaligned loads get the byte used to dereferenced global memory - // and extract that byte from the loaded uint32. - Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); - byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); + // To handle unaligned loads get the bytes used to dereference global memory and extract + // those bytes from the loaded u32. + if (IsUnaligned(type)) { + Node mask = Immediate(GetUnalignedMask(type)); + Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), - Immediate(8)); + gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), + std::move(offset), Immediate(size)); } SetTemporary(bb, i, gmem); @@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } }(); + // For unaligned reads we have to read memory too. + const bool is_read = IsUnaligned(type); const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true); + TrackGlobalMemory(bb, instr, is_read, true); if (!real_address_base || !base_address) { // Tracking failed, skip the store. break; } - const u32 count = GetStgMemorySize(type); + const u32 size = GetMemorySize(type); + const u32 count = Common::AlignUp(size, 32) / 32; for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode(real_address, base_address, descriptor); - const Node value = GetRegister(instr.gpr0.Value() + i); + Node value = GetRegister(instr.gpr0.Value() + i); + + if (IsUnaligned(type)) { + Node mask = Immediate(GetUnalignedMask(type)); + Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); + + value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, + Immediate(size)); + } + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; @@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { std::tuple ShaderIR::TrackGlobalMemory(NodeBlock& bb, Instruction instr, - bool is_write) { + bool is_read, bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast(instr.gmem.offset)}; @@ -351,11 +372,8 @@ std::tuple ShaderIR::TrackGlobalMemory(NodeBlock& const GlobalMemoryBase descriptor{index, offset}; const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); auto& usage = entry->second; - if (is_write) { - usage.is_written = true; - } else { - usage.is_read = true; - } + usage.is_written |= is_write; + usage.is_read |= is_read; const auto real_address = Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index aacd0a0da..ba1db4c11 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -394,7 +394,7 @@ private: std::tuple TrackGlobalMemory(NodeBlock& bb, Tegra::Shader::Instruction instr, - bool is_write); + bool is_read, bool is_write); /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); -- cgit v1.2.3