From 2faad9bf23dbcedc80dca7ed9ad4b81c0416dd5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 02:58:47 -0300 Subject: shader_decode: Use BitfieldExtract instead of shift + and --- .../shader/decode/arithmetic_integer.cpp | 5 ++--- src/video_core/shader/decode/bfi.cpp | 9 ++------ .../shader/decode/register_set_predicate.cpp | 12 +++++----- src/video_core/shader/decode/video.cpp | 12 +++------- src/video_core/shader/decode/xmad.cpp | 26 +++++----------------- src/video_core/shader/shader_ir.cpp | 5 +++++ src/video_core/shader/shader_ir.h | 9 ++++++-- 7 files changed, 30 insertions(+), 48 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 271ce205b..931e0fa1d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -57,10 +57,9 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { case IAdd3Height::None: return value; case IAdd3Height::LowerHalfWord: - return Operation(OperationCode::IBitwiseAnd, NO_PRECISE, value, Immediate(0xffff)); + return BitfieldExtract(value, 0, 16); case IAdd3Height::UpperHalfWord: - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, value, - Immediate(16)); + return BitfieldExtract(value, 16, 16); default: UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast(height)); return Immediate(0); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index a750aca30..b0d8d9eba 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -28,13 +28,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { } }(); const Node insert = GetRegister(instr.gpr8); - - const Node offset = - Operation(OperationCode::UBitwiseAnd, NO_PRECISE, packed_shift, Immediate(0xff)); - - Node bits = - Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, packed_shift, Immediate(8)); - bits = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, bits, Immediate(0xff)); + const Node offset = BitfieldExtract(packed_shift, 0, 8); + const Node bits = BitfieldExtract(packed_shift, 8, 8); const Node value = Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index 06a3c7539..14bce9fa4 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -27,20 +27,18 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { return Immediate(static_cast(instr.r2p.immediate_mask)); } }(); - const Node mask = - Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, GetRegister(instr.gpr8), - Immediate(static_cast(instr.r2p.byte))); + const Node mask = GetRegister(instr.gpr8); + const auto offset = static_cast(instr.r2p.byte) * 8; constexpr u32 programmable_preds = 7; for (u64 pred = 0; pred < programmable_preds; ++pred) { - const Node shift = Immediate(1u << static_cast(pred)); + const auto shift = static_cast(pred); - const Node apply_compare = - Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift); + const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); const Node condition = Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); - const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift); + const Node value_compare = BitfieldExtract(mask, offset + shift, 1); const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 9510896e4..b491fbadb 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -88,21 +88,15 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height) { if (!is_chunk) { - const auto offset = static_cast(byte_height * 8); - const Node shift = SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, - op, Immediate(offset)); - return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, shift, - Immediate(0xff)); + return BitfieldExtract(op, static_cast(byte_height * 8), 8); } const Node zero = Immediate(0); switch (type) { case Tegra::Shader::VideoType::Size16_Low: - return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, op, - Immediate(0xffff)); + return BitfieldExtract(op, 0, 16); case Tegra::Shader::VideoType::Size16_High: - return SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, op, - Immediate(16)); + return BitfieldExtract(op, 16, 16); case Tegra::Shader::VideoType::Size32: // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 0466069ae..3e37aee4a 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -47,22 +47,10 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { return {false, Immediate(0), Immediate(0)}; }(); - if (instr.xmad.high_a) { - op_a = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_a, NO_PRECISE, op_a, - Immediate(16)); - } else { - op_a = SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, NO_PRECISE, op_a, - Immediate(0xffff)); - } + op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); const Node original_b = op_b; - if (instr.xmad.high_b) { - op_b = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_b, NO_PRECISE, op_a, - Immediate(16)); - } else { - op_b = SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, NO_PRECISE, op_b, - Immediate(0xffff)); - } + op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); // TODO(Rodrigo): Use an appropiate sign for this operation Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); @@ -75,11 +63,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { case Tegra::Shader::XmadMode::None: return op_c; case Tegra::Shader::XmadMode::CLo: - return SignedOperation(OperationCode::IBitwiseAnd, is_signed_c, NO_PRECISE, op_c, - Immediate(0xffff)); + return BitfieldExtract(op_c, 0, 16); case Tegra::Shader::XmadMode::CHi: - return SignedOperation(OperationCode::ILogicalShiftRight, is_signed_c, NO_PRECISE, op_c, - Immediate(16)); + return BitfieldExtract(op_c, 16, 16); case Tegra::Shader::XmadMode::CBcc: { const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, NO_PRECISE, original_b, Immediate(16)); @@ -94,9 +80,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { // TODO(Rodrigo): Use an appropiate sign for this operation Node sum = Operation(OperationCode::IAdd, product, op_c); if (is_merge) { - const Node a = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, sum, Immediate(0xffff)); + const Node a = BitfieldExtract(sum, 0, 16); const Node b = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(0xffff)); + Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index b07642517..d4e304b4e 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -348,6 +348,11 @@ void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); } +Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { + return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), + Immediate(bits)); +} + /*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { if (is_signed) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 52c7c3180..75d13fa4d 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -88,6 +88,7 @@ enum class OperationCode { IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int IBitwiseNot, /// (MetaArithmetic, int a) -> int IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int + IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int IBitCount, /// (MetaArithmetic, int) -> int UAdd, /// (MetaArithmetic, uint a, uint b) -> uint @@ -104,8 +105,9 @@ enum class OperationCode { UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint + UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint + UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint + UBitCount, /// (MetaArithmetic, uint) -> uint HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 @@ -689,6 +691,9 @@ private: const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + /// Extracts a sequence of bits from a node + Node BitfieldExtract(Node value, u32 offset, u32 bits); + void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, Node texture); void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, Node texture); -- cgit v1.2.3