summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--appveyor.yml2
-rw-r--r--src/video_core/engines/shader_bytecode.h31
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp86
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h2
-rw-r--r--src/yuzu/main.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp2
6 files changed, 104 insertions, 21 deletions
diff --git a/appveyor.yml b/appveyor.yml
index a6f12b267..436d98fa1 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -53,7 +53,7 @@ build_script:
# https://www.appveyor.com/docs/build-phase
msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
} else {
- C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
+ C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -j4 -C mingw_build/ 2>&1'
}
after_build:
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index b038a9d92..096de9632 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -12,6 +12,7 @@
#include <boost/optional.hpp>
+#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -79,6 +80,9 @@ union Attribute {
// shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
// shader.
TessCoordInstanceIDVertexID = 47,
+ // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
+ // shader. It is unknown what the other values contain.
+ FrontFacing = 63,
};
union {
@@ -214,6 +218,11 @@ enum class FlowCondition : u64 {
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
};
+enum class PredicateResultMode : u64 {
+ None = 0x0,
+ NotZero = 0x3,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -254,7 +263,7 @@ union Instruction {
BitField<39, 1, u64> invert_a;
BitField<40, 1, u64> invert_b;
BitField<41, 2, LogicOperation> operation;
- BitField<44, 2, u64> unk44;
+ BitField<44, 2, PredicateResultMode> pred_result_mode;
BitField<48, 3, Pred> pred48;
} lop;
@@ -438,16 +447,20 @@ union Instruction {
}
bool IsComponentEnabled(size_t component) const {
- static constexpr std::array<std::array<u32, 8>, 4> mask_lut{
- {{},
- {0x1, 0x2, 0x4, 0x8, 0x3},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x7, 0xb, 0xd, 0xe, 0xf}}};
+ static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
+ {},
+ {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+ {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+ {0x7, 0xb, 0xd, 0xe, 0xf},
+ }};
size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
- return ((1ull << component) & mask_lut[index][component_mask_selector]) != 0;
+ u32 mask = mask_lut[index][component_mask_selector];
+ // A mask of 0 means this instruction uses an unimplemented mask.
+ ASSERT(mask != 0);
+ return ((1ull << component) & mask) != 0;
}
} texs;
@@ -513,6 +526,8 @@ public:
LD_A,
LD_C,
ST_A,
+ LDG, // Load from global memory
+ STG, // Store in global memory
TEX,
TEXQ, // Texture Query
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
@@ -724,6 +739,8 @@ private:
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
+ INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bad1849b8..44c6120b8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -542,6 +542,10 @@ private:
// shader.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))";
+ case Attribute::Index::FrontFacing:
+ // TODO(Subv): Find out what the values are for the other elements.
+ ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
+ return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -756,28 +760,51 @@ private:
}
void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
- const std::string& op_b) {
+ const std::string& op_b,
+ Tegra::Shader::PredicateResultMode predicate_mode,
+ Tegra::Shader::Pred predicate) {
+ std::string result{};
switch (logic_op) {
case LogicOperation::And: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+ result = '(' + op_a + " & " + op_b + ')';
break;
}
case LogicOperation::Or: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+ result = '(' + op_a + " | " + op_b + ')';
break;
}
case LogicOperation::Xor: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+ result = '(' + op_a + " ^ " + op_b + ')';
break;
}
case LogicOperation::PassB: {
- regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+ result = op_b;
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
UNREACHABLE();
}
+
+ if (dest != Tegra::Shader::Register::ZeroIndex) {
+ regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+ }
+
+ using Tegra::Shader::PredicateResultMode;
+ // Write the predicate value depending on the predicate mode.
+ switch (predicate_mode) {
+ case PredicateResultMode::None:
+ // Do nothing.
+ return;
+ case PredicateResultMode::NotZero:
+ // Set the predicate to true if the result is not zero.
+ SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}",
+ static_cast<u32>(predicate_mode));
+ UNREACHABLE();
+ }
}
void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
@@ -815,6 +842,33 @@ private:
shader.AddLine('}');
}
+ /*
+ * Emits code to push the input target address to the SSY address stack, incrementing the stack
+ * top.
+ */
+ void EmitPushToSSYStack(u32 target) {
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;");
+ shader.AddLine("ssy_stack_top++;");
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
+ /*
+ * Emits code to pop an address from the SSY address stack, setting the jump address to the
+ * popped address and decrementing the stack top.
+ */
+ void EmitPopFromSSYStack() {
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine("ssy_stack_top--;");
+ shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];");
+ shader.AddLine("break;");
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -1099,7 +1153,9 @@ private:
if (instr.alu.lop32i.invert_b)
op_b = "~(" + op_b + ')';
- WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+ WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+ Tegra::Shader::PredicateResultMode::None,
+ Tegra::Shader::Pred::UnusedIndex);
break;
}
default: {
@@ -1165,16 +1221,14 @@ private:
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
- ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
- ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
-
if (instr.alu.lop.invert_a)
op_a = "~(" + op_a + ')';
if (instr.alu.lop.invert_b)
op_b = "~(" + op_b + ')';
- WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+ WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+ instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
break;
}
case OpCode::Id::IMNMX_C:
@@ -1841,13 +1895,13 @@ private:
ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
u32 target = offset + instr.bra.GetBranchTarget();
- shader.AddLine("ssy_target = " + std::to_string(target) + "u;");
+ EmitPushToSSYStack(target);
break;
}
case OpCode::Id::SYNC: {
// The SYNC opcode jumps to the address previously set by the SSY opcode
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
- shader.AddLine("{ jmp_to = ssy_target; break; }");
+ EmitPopFromSSYStack();
break;
}
case OpCode::Id::DEPBAR: {
@@ -1918,7 +1972,13 @@ private:
} else {
labels.insert(subroutine.begin);
shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
- shader.AddLine("uint ssy_target = 0u;");
+
+ // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems
+ // unlikely that shaders will use 20 nested SSYs.
+ constexpr u32 SSY_STACK_SIZE = 20;
+ shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];");
+ shader.AddLine("uint ssy_stack_top = 0u;");
+
shader.AddLine("while (true) {");
++shader.scope;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 8f719fdd8..5d91a0c2f 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -147,6 +147,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
// GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
return GL_CLAMP_TO_BORDER;
+ case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+ return GL_MIRROR_CLAMP_TO_EDGE;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
UNREACHABLE();
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 2df65023a..11d2331df 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -377,6 +377,8 @@ bool GMainWindow::SupportsRequiredGLExtensions() {
unsupported_ext.append("ARB_vertex_attrib_binding");
if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
unsupported_ext.append("ARB_vertex_type_10f_11f_11f_rev");
+ if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)
+ unsupported_ext.append("ARB_texture_mirror_clamp_to_edge");
// Extensions required to support some texture formats.
if (!GLAD_GL_EXT_texture_compression_s3tc)
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index e2945b6cf..351dd9225 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -89,6 +89,8 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
unsupported_ext.push_back("ARB_vertex_attrib_binding");
if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev");
+ if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)
+ unsupported_ext.push_back("ARB_texture_mirror_clamp_to_edge");
// Extensions required to support some texture formats.
if (!GLAD_GL_EXT_texture_compression_s3tc)