From e5ca097e32d9ec03b1ba4b5e44e3b6553e3addd4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Oct 2018 11:38:40 -0400 Subject: Refactor precise usage and add FMNMX, MUFU, FMUL32 and FADD332 --- .../renderer_opengl/gl_shader_decompiler.cpp | 102 +++++++-------------- src/video_core/renderer_opengl/gl_shader_gen.cpp | 9 +- 2 files changed, 37 insertions(+), 74 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 002ae90a7..dcf6941b0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -341,10 +341,10 @@ public: */ void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, - bool is_saturated = false, u64 dest_elem = 0) { + bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, - dest_num_components, value_num_components, dest_elem); + dest_num_components, value_num_components, dest_elem, precise); } /** @@ -368,7 +368,7 @@ public: const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', - dest_num_components, value_num_components, dest_elem); + dest_num_components, value_num_components, dest_elem, false); if (sets_cc) { const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; @@ -416,7 +416,7 @@ public: } }(); - SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); + SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); } /** @@ -757,7 +757,8 @@ private: * @param dest_elem Optional, the destination element to use for the operation. */ void SetRegister(const Register& reg, u64 elem, const std::string& value, - u64 dest_num_components, u64 value_num_components, u64 dest_elem) { + u64 dest_num_components, u64 value_num_components, u64 dest_elem, + bool precise) { if (reg == Register::ZeroIndex) { LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); UNREACHABLE(); @@ -774,7 +775,18 @@ private: src += GetSwizzle(elem); } - shader.AddLine(dest + " = " + src + ';'); + if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { + shader.AddLine('{'); + ++shader.scope; + // This avoids optimizations of constant propagation and keeps the code as the original + // Sadly using the precise keyword causes "linking" errors on fragment shaders. + shader.AddLine("precise float tmp = " + src + ';'); + shader.AddLine(dest + " = tmp;"); + --shader.scope; + shader.AddLine('}'); + } else { + shader.AddLine(dest + " = " + src + ';'); + } } /// Build the GLSL register list. @@ -1511,24 +1523,8 @@ private: op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); - shader.AddLine('{'); - ++shader.scope; - - // This avoids optimizations of constant propagation and keeps the code as the original - // Sadly using the precise keyword causes "linking" errors on fragment shaders. - if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine("float tmp = " + op_a + " * " + op_b + ';'); - } else { - shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';'); - } - - - regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, - instr.alu.saturate_d); - - - --shader.scope; - shader.AddLine('}'); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, + instr.alu.saturate_d, 0, true); break; } case OpCode::Id::FADD_C: @@ -1537,24 +1533,8 @@ private: op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); - shader.AddLine('{'); - ++shader.scope; - - // This avoids optimizations of constant propagation and keeps the code as the original - // Sadly using the precise keyword causes "linking" errors on fragment shaders. - if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine("float tmp = " + op_a + " + " + op_b + ';'); - } else { - shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';'); - } - regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, - instr.alu.saturate_d); - - - --shader.scope; - shader.AddLine('}'); - - + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, + instr.alu.saturate_d, 0, true); break; } case OpCode::Id::MUFU: { @@ -1562,31 +1542,31 @@ private: switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Sin: regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Ex2: regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Lg2: regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Rcp: regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Rsq: regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; case SubOp::Sqrt: regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, 0, true); break; default: LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", @@ -1607,7 +1587,7 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', - 1, 1); + 1, 1, false, 0, true); break; } case OpCode::Id::RRO_C: @@ -1636,7 +1616,7 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), - 1, 1, instr.fmul32.saturate); + 1, 1, instr.fmul32.saturate, 0, true); break; } case OpCode::Id::FADD32I: { @@ -1659,7 +1639,7 @@ private: op_b = "-(" + op_b + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); break; } } @@ -2121,23 +2101,9 @@ private: } } - shader.AddLine('{'); - ++shader.scope; + regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', + 1, 1, instr.alu.saturate_d, 0, true); - // This avoids optimizations of constant propagation and keeps the code as the original - // Sadly using the precise keyword causes "linking" errors on fragment shaders. - if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); - } else { - shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); - } - - regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, - instr.alu.saturate_d); - - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Type::Hfma2: { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 08dd8dc6c..dfb562706 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -15,8 +15,7 @@ static constexpr u32 PROGRAM_OFFSET{10}; ProgramResult GenerateVertexShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += R"( @@ -84,8 +83,7 @@ void main() { ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_geometry();\n"; @@ -119,8 +117,7 @@ void main() { ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_fragment();\n"; -- cgit v1.2.3