summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp44
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.h2
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp2
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_special.cpp22
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp11
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/shader_recompiler/shader_info.h2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
11 files changed, 86 insertions, 38 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 0e8fe017d..d224c4d84 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) {
}
}
+std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
+ if (is_written && !is_read) {
+ return "writeonly ";
+ }
+ if (is_read && !is_written) {
+ return "readonly ";
+ }
+ return "";
+}
+
std::string_view GetTessMode(TessPrimitive primitive) {
switch (primitive) {
case TessPrimitive::Triangles:
@@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
const RuntimeInfo& runtime_info_)
: info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
- header += "#pragma optionNV(fastmath off)\n";
+ if (profile.need_fastmath_off) {
+ header += "#pragma optionNV(fastmath off)\n";
+ }
SetupExtensions();
stage = program.stage;
switch (program.stage) {
@@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
}
for (size_t index = 0; index < info.stores_generics.size(); ++index) {
// TODO: Properly resolve attribute issues
- if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) {
+ if (info.stores_generics[index] || StageInitializesVaryings()) {
DefineGenericOutput(index, program.invocations);
}
}
@@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
DefineConstants();
}
+bool EmitContext::StageInitializesVaryings() const noexcept {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ case Stage::Geometry:
+ return true;
+ default:
+ return false;
+ }
+}
+
void EmitContext::SetupExtensions() {
if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
header += "#extension GL_EXT_texture_shadow_lod : enable\n";
@@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() {
header += "#extension GL_NV_shader_atomic_float : enable\n";
}
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
- header += "#extension NV_shader_atomic_fp16_vector : enable\n";
+ header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
}
if (info.uses_fp16) {
if (profile.support_gl_nv_gpu_shader_5) {
@@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() {
if (info.stores_viewport_mask && profile.support_viewport_mask) {
header += "#extension GL_NV_viewport_array2 : enable\n";
}
- if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) {
+ if (info.uses_typeless_image_reads) {
header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
}
if (info.uses_derivatives && profile.support_gl_derivative_control) {
@@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
"return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
"uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
}
- write_func += "}";
- write_func_64 += "}";
- write_func_128 += "}";
+ write_func += '}';
+ write_func_64 += '}';
+ write_func_128 += '}';
load_func += "return 0u;}";
load_func_64 += "return uvec2(0);}";
load_func_128 += "return uvec4(0);}";
@@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) {
for (const auto& desc : info.image_buffer_descriptors) {
image_buffers.push_back({bindings.image, desc.count});
const auto format{ImageFormatString(desc.format)};
+ const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
- header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image,
- format, bindings.image, array_decorator);
+ header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
+ bindings.image, format, qualifier, bindings.image, array_decorator);
bindings.image += desc.count;
}
images.reserve(info.image_descriptors.size());
@@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) {
images.push_back({bindings.image, desc.count});
const auto format{ImageFormatString(desc.format)};
const auto image_type{ImageType(desc.type)};
- const auto qualifier{desc.is_written ? "" : "readonly "};
+ const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
qualifier, image_type, bindings.image, array_decorator);
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
index 8fa87c02c..4a50556e1 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.h
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -136,6 +136,8 @@ public:
code += '\n';
}
+ [[nodiscard]] bool StageInitializesVaryings() const noexcept;
+
std::string header;
std::string code;
VarAlloc var_alloc;
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index edeecc26e..a241d18fe 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
break;
case IR::Attribute::FogCoordinate:
- ctx.Add("gl_FogFragCoord.x={};", value);
+ ctx.Add("gl_FogFragCoord={};", value);
break;
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
index cfef58d79..59ca52f07 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -10,6 +10,17 @@
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLSL {
+namespace {
+void InitializeVaryings(EmitContext& ctx) {
+ ctx.Add("gl_Position=vec4(0,0,0,1);");
+ // TODO: Properly resolve attribute issues
+ for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
+ if (!ctx.info.stores_generics[index]) {
+ ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
+ }
+ }
+}
+} // Anonymous namespace
void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
const size_t num_args{phi.NumArgs()};
@@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
}
void EmitPrologue(EmitContext& ctx) {
- if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) {
- ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);");
- // TODO: Properly resolve attribute issues
- for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
- if (!ctx.info.stores_generics[index]) {
- ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
- }
- }
+ if (ctx.StageInitializesVaryings()) {
+ InitializeVaryings(ctx);
}
}
@@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {}
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
+ InitializeVaryings(ctx);
}
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index e9098239d..737f186ab 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -312,11 +312,14 @@ public:
}
u32 Add(const ImageBufferDescriptor& desc) {
- return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
+ const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
desc.size_shift == existing.size_shift;
- });
+ })};
+ image_buffer_descriptors[index].is_written |= desc.is_written;
+ image_buffer_descriptors[index].is_read |= desc.is_read;
+ return index;
}
u32 Add(const TextureDescriptor& desc) {
@@ -339,6 +342,7 @@ public:
desc.size_shift == existing.size_shift;
})};
image_descriptors[index].is_written |= desc.is_written;
+ image_descriptors[index].is_read |= desc.is_read;
return index;
}
@@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) {
throw NotImplementedException("Unexpected separate sampler");
}
const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
+ const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead};
if (flags.type == TextureType::Buffer) {
index = descriptors.Add(ImageBufferDescriptor{
.format = flags.image_format,
.is_written = is_written,
+ .is_read = is_read,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.count = cbuf.count,
@@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) {
.type = flags.type,
.format = flags.image_format,
.is_written = is_written,
+ .is_read = is_read,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.count = cbuf.count,
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index e8cfc03af..a3c412a0f 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -97,6 +97,8 @@ struct Profile {
/// Fragment outputs have to be declared even if they are not written to avoid undefined values.
/// See Ori and the Blind Forest's main menu for reference.
bool need_declared_frag_colors{};
+ /// Prevents fast math optimizations that may cause inaccuracies
+ bool need_fastmath_off{};
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 74d7a6a94..e9ebc16a4 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes
struct ImageBufferDescriptor {
ImageFormat format;
bool is_written;
+ bool is_read;
u32 cbuf_index;
u32 cbuf_offset;
u32 count;
@@ -99,6 +100,7 @@ struct ImageDescriptor {
TextureType type;
ImageFormat format;
bool is_written;
+ bool is_read;
u32 cbuf_index;
u32 cbuf_offset;
u32 count;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index bf08a6d93..5838fc02f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -162,6 +162,7 @@ Device::Device() {
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+ need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 0b59c9df0..0c9d6fe31 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -136,6 +136,10 @@ public:
return warp_size_potentially_larger_than_guest;
}
+ bool NeedsFastmathOff() const {
+ return need_fastmath_off;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -171,6 +175,7 @@ private:
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
+ bool need_fastmath_off{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index d27a3cf46..8d11fbc55 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable {
- if (device.UseAssemblyShaders()) {
- for (size_t stage = 0; stage < 5; ++stage) {
- const auto code{sources[stage]};
- if (code.empty()) {
- continue;
- }
+ if (!device.UseAssemblyShaders()) {
+ program.handle = glCreateProgram();
+ }
+ for (size_t stage = 0; stage < 5; ++stage) {
+ const auto code{sources[stage]};
+ if (code.empty()) {
+ continue;
+ }
+ if (device.UseAssemblyShaders()) {
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
- }
- } else {
- program.handle = glCreateProgram();
- for (size_t stage = 0; stage < 5; ++stage) {
- const auto code{sources[stage]};
- if (code.empty()) {
- continue;
- }
+ } else {
AttachShader(Stage(stage), program.handle, code);
}
- LinkProgram(program.handle);
}
- if (shader_notify) {
- shader_notify->MarkShaderComplete();
+ if (!device.UseAssemblyShaders()) {
+ LinkProgram(program.handle);
}
u32 num_textures{};
u32 num_images{};
@@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
if (assembly_shaders && xfb_state) {
GenerateTransformFeedbackState(*xfb_state);
}
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
is_built.store(true, std::memory_order_relaxed);
}};
if (thread_worker) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index fedbce2f0..620666622 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.lower_left_origin_mode = true,
.need_declared_frag_colors = true,
+ .need_fastmath_off = device.NeedsFastmathOff(),
.has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true,