diff options
Diffstat (limited to 'src/video_core')
25 files changed, 1034 insertions, 761 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 08586d33c..63d449135 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <bitset> #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -49,6 +50,33 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { } } +Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { + const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); + ASSERT(cbuf_mask[regs.tex_cb_index]); + + const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; + ASSERT(texinfo.Address() != 0); + + const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); + ASSERT(address < texinfo.Address() + texinfo.size); + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; + return GetTextureInfo(tex_handle, offset); +} + +Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const { + return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id), + GetTSCEntry(tex_handle.tsc_id)}; +} + +u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { + const auto& buffer = launch_description.const_buffer_config[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, @@ -60,4 +88,29 @@ void KeplerCompute::ProcessLaunch() { rasterizer.DispatchCompute(code_addr); } +Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { + const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; + + Texture::TICEntry tic_entry; + memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + + const auto r_type{tic_entry.r_type.Value()}; + const auto g_type{tic_entry.g_type.Value()}; + const auto b_type{tic_entry.b_type.Value()}; + const auto a_type{tic_entry.a_type.Value()}; + + // TODO(Subv): Different data types for separate components are not supported + DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + + return tic_entry; +} + +Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const { + const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; + + Texture::TSCEntry tsc_entry; + memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); + return tsc_entry; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 6a3309a2c..90cf650d2 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,6 +12,7 @@ #include "common/common_types.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" +#include "video_core/textures/texture.h" namespace Core { class System; @@ -111,7 +112,7 @@ public: INSERT_PADDING_WORDS(0x3FE); - u32 texture_const_buffer_index; + u32 tex_cb_index; INSERT_PADDING_WORDS(0x374); }; @@ -149,7 +150,7 @@ public: union { BitField<0, 8, u32> const_buffer_enable_mask; BitField<29, 2, u32> cache_layout; - } memory_config; + }; INSERT_PADDING_WORDS(0x8); @@ -194,6 +195,14 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const; + + /// Given a Texture Handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const; + + u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; @@ -201,6 +210,12 @@ private: Upload::State upload_state; void ProcessLaunch(); + + /// Retrieves information about a specific TIC entry from the TIC buffer. + Texture::TICEntry GetTICEntry(u32 tic_index) const; + + /// Retrieves information about a specific TSC entry from the TSC buffer. + Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -218,12 +233,12 @@ ASSERT_REG_POSITION(launch, 0xAF); ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(code_loc, 0x582); -ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); +ASSERT_REG_POSITION(tex_cb_index, 0x982); ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); -ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); +ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14); ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 9e7b50327..f67a5389f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -62,6 +62,7 @@ public: static constexpr std::size_t NumVertexAttributes = 32; static constexpr std::size_t NumVaryings = 31; static constexpr std::size_t NumTextureSamplers = 32; + static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; static constexpr std::size_t MaxShaderStage = 5; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 03d434b28..4f59a87b4 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -14,12 +14,22 @@ namespace OpenGL { namespace { + template <typename T> T GetInteger(GLenum pname) { GLint temporary; glGetIntegerv(pname, &temporary); return static_cast<T>(temporary); } + +bool TestProgram(const GLchar* glsl) { + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)}; + GLint link_status; + glGetProgramiv(shader, GL_LINK_STATUS, &link_status); + glDeleteProgram(shader); + return link_status == GL_TRUE; +} + } // Anonymous namespace Device::Device() { @@ -32,6 +42,11 @@ Device::Device() { has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); + has_precise_bug = TestPreciseBug(); + + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); + LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); + LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); } Device::Device(std::nullptr_t) { @@ -42,30 +57,21 @@ Device::Device(std::nullptr_t) { has_vertex_viewport_layer = true; has_variable_aoffi = true; has_component_indexing_bug = false; + has_precise_bug = false; } bool Device::TestVariableAoffi() { - const GLchar* AOFFI_TEST = R"(#version 430 core + return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. uniform sampler2D tex; uniform ivec2 variable_offset; out vec4 output_attribute; void main() { output_attribute = textureOffset(tex, vec2(0), variable_offset); -} -)"; - const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)}; - GLint link_status{}; - glGetProgramiv(shader, GL_LINK_STATUS, &link_status); - glDeleteProgram(shader); - - const bool supported{link_status == GL_TRUE}; - LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported); - return supported; +})"); } bool Device::TestComponentIndexingBug() { - constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; const GLchar* COMPONENT_TEST = R"(#version 430 core layout (std430, binding = 0) buffer OutputBuffer { uint output_value; @@ -105,12 +111,21 @@ void main() { GLuint result; glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); if (result != values.at(index)) { - LOG_INFO(Render_OpenGL, log_message, true); return true; } } - LOG_INFO(Render_OpenGL, log_message, false); return false; } +bool Device::TestPreciseBug() { + return !TestProgram(R"(#version 430 core +in vec3 coords; +out float out_value; +uniform sampler2DShadow tex; +void main() { + precise float tmp_value = vec4(texture(tex, coords)).x; + out_value = tmp_value; +})"); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 3ef7c6dd8..ba6dcd3be 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -46,9 +46,14 @@ public: return has_component_indexing_bug; } + bool HasPreciseBug() const { + return has_precise_bug; + } + private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); + static bool TestPreciseBug(); std::size_t uniform_buffer_alignment{}; std::size_t shader_storage_alignment{}; @@ -58,6 +63,7 @@ private: bool has_vertex_viewport_layer{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; + bool has_precise_bug{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bb09ecd52..4e266cdad 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -331,7 +331,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); SetupDrawConstBuffers(stage_enum, shader); SetupDrawGlobalMemory(stage_enum, shader); - const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; + const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)}; const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); @@ -537,8 +537,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( texture_cache.MarkDepthBufferInUse(); fbkey.zeta = depth_surface; - fbkey.stencil_enable = regs.stencil_enable && - depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; + fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } texture_cache.GuardRenderTargets(false); @@ -577,16 +576,15 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo if (depth_surface) { const auto& params = depth_surface->GetSurfaceParams(); switch (params.type) { - case VideoCore::Surface::SurfaceType::Depth: { + case VideoCore::Surface::SurfaceType::Depth: depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; - } - case VideoCore::Surface::SurfaceType::DepthStencil: { - depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); + case VideoCore::Surface::SurfaceType::DepthStencil: + depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); break; - } - default: { UNIMPLEMENTED(); } + default: + UNIMPLEMENTED(); } } else { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -639,6 +637,7 @@ void RasterizerOpenGL::Clear() { ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); use_stencil = true; clear_state.stencil.test_enabled = true; + if (regs.clear_flags.stencil) { // Stencil affects the clear so fill it with the used masks clear_state.stencil.front.test_func = GL_ALWAYS; @@ -802,7 +801,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } auto kernel = shader_cache.GetComputeKernel(code_addr); - const auto [program, next_bindings] = kernel->GetProgramHandle({}); + ProgramVariant variant; + variant.texture_buffer_usage = SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const auto [program, next_bindings] = kernel->GetProgramHandle(variant); state.draw.shader_program = program; state.draw.program_pipeline = 0; @@ -817,13 +820,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); - // TODO(Rodrigo): Bind images and samplers - buffer_cache.Unmap(); bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); + state.ApplyTextures(); + state.ApplyImages(); state.ApplyShaderProgram(); state.ApplyProgramPipeline(); @@ -923,7 +926,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; for (const auto& entry : kernel->GetShaderEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; - const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); + const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); Tegra::Engines::ConstBufferInfo buffer; buffer.address = config.Address(); buffer.size = config.size; @@ -982,53 +985,125 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); } -TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - BaseBindings base_bindings) { +TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, + const Shader& shader, + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; - ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), + ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); TextureBufferUsage texture_buffer_usage{0}; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - Tegra::Texture::FullTextureInfo texture; - if (entry.IsBindless()) { + const auto texture = [&]() { + if (!entry.IsBindless()) { + return maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); - texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); - } else { - texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + }(); + + if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { + texture_buffer_usage.set(bindpoint); } - const u32 current_bindpoint = base_bindings.sampler + bindpoint; + } - auto& unit{state.texture_units[current_bindpoint]}; - unit.sampler = sampler_cache.GetSampler(texture.tsc); + return texture_buffer_usage; +} - if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { - if (view->GetSurfaceParams().IsBuffer()) { - // Record that this texture is a texture buffer. - texture_buffer_usage.set(bindpoint); - } else { - // Apply swizzle to textures that are not buffers. - view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); +TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { + MICROPROFILE_SCOPE(OpenGL_Texture); + const auto& compute = system.GPU().KeplerCompute(); + const auto& entries = kernel->GetShaderEntries().samplers; + + ASSERT_MSG(entries.size() <= std::size(state.textures), + "Exceeded the number of active textures."); + + TextureBufferUsage texture_buffer_usage{0}; + + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto texture = [&]() { + if (!entry.IsBindless()) { + return compute.GetTexture(entry.GetOffset()); } - state.texture_units[current_bindpoint].texture = view->GetTexture(); - } else { - // Can occur when texture addr is null or its memory is unmapped/invalid - unit.texture = 0; + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + return compute.GetTextureInfo(tex_handle, entry.GetOffset()); + }(); + + if (SetupTexture(bindpoint, texture, entry)) { + texture_buffer_usage.set(bindpoint); } } return texture_buffer_usage; } +bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + const GLShader::SamplerEntry& entry) { + state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); + + const auto view = texture_cache.GetTextureSurface(texture.tic, entry); + if (!view) { + // Can occur when texture addr is null or its memory is unmapped/invalid + state.textures[binding] = 0; + return false; + } + state.textures[binding] = view->GetTexture(); + + if (view->GetSurfaceParams().IsBuffer()) { + return true; + } + + // Apply swizzle to textures that are not buffers. + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + return false; +} + +void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { + const auto& compute = system.GPU().KeplerCompute(); + const auto& entries = shader->GetShaderEntries().images; + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto tic = [&]() { + if (!entry.IsBindless()) { + return compute.GetTexture(entry.GetOffset()).tic; + } + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; + }(); + SetupImage(bindpoint, tic, entry); + } +} + +void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, + const GLShader::ImageEntry& entry) { + const auto view = texture_cache.GetImageSurface(tic, entry); + if (!view) { + state.images[binding] = 0; + return; + } + if (!tic.IsBuffer()) { + view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + } + if (entry.IsWritten()) { + view->MarkAsModified(texture_cache.Tick()); + } + state.images[binding] = view->GetTexture(); +} + void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { const auto& regs = system.GPU().Maxwell3D().regs; const bool geometry_shaders_enabled = @@ -1119,9 +1194,12 @@ void RasterizerOpenGL::SyncStencilTestState() { if (!maxwell3d.dirty.stencil_test) { return; } - const auto& regs = maxwell3d.regs; + maxwell3d.dirty.stencil_test = false; + const auto& regs = maxwell3d.regs; state.stencil.test_enabled = regs.stencil_enable != 0; + state.MarkDirtyStencilState(); + if (!regs.stencil_enable) { return; } @@ -1150,8 +1228,6 @@ void RasterizerOpenGL::SyncStencilTestState() { state.stencil.back.action_depth_fail = GL_KEEP; state.stencil.back.action_depth_pass = GL_KEEP; } - state.MarkDirtyStencilState(); - maxwell3d.dirty.stencil_test = false; } void RasterizerOpenGL::SyncColorMask() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9d20a4fbf..eada752e0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/textures/texture.h" namespace Core { class System; @@ -137,8 +138,22 @@ private: /// Configures the current textures to use for the draw command. Returns shaders texture buffer /// usage. - TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, BaseBindings base_bindings); + TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader, BaseBindings base_bindings); + + /// Configures the textures used in a compute shader. Returns texture buffer usage. + TextureBufferUsage SetupComputeTextures(const Shader& kernel); + + /// Configures a texture. Returns true when the texture is a texture buffer. + bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + const GLShader::SamplerEntry& entry); + + /// Configures images in a compute shader. + void SetupComputeImages(const Shader& shader); + + /// Configures an image. + void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, + const GLShader::ImageEntry& entry); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cf6a5cddf..909ccb82c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -348,23 +348,16 @@ Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, } std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { - GLuint handle{}; - if (program_type == ProgramType::Geometry) { - handle = GetGeometryShader(variant); - } else { - const auto [entry, is_cache_miss] = programs.try_emplace(variant); - auto& program = entry->second; - if (is_cache_miss) { - program = TryLoadProgram(variant); - if (!program) { - program = SpecializeShader(code, entries, program_type, variant); - disk_cache.SaveUsage(GetUsage(variant)); - } - - LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); + const auto [entry, is_cache_miss] = programs.try_emplace(variant); + auto& program = entry->second; + if (is_cache_miss) { + program = TryLoadProgram(variant); + if (!program) { + program = SpecializeShader(code, entries, program_type, variant); + disk_cache.SaveUsage(GetUsage(variant)); } - handle = program->handle; + LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } auto base_bindings = variant.base_bindings; @@ -375,52 +368,9 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); base_bindings.sampler += static_cast<u32>(entries.samplers.size()); - return {handle, base_bindings}; + return {program->handle, base_bindings}; } -GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) { - const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant); - auto& programs = entry->second; - - switch (variant.primitive_mode) { - case GL_POINTS: - return LazyGeometryProgram(programs.points, variant); - case GL_LINES: - case GL_LINE_STRIP: - return LazyGeometryProgram(programs.lines, variant); - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: - return LazyGeometryProgram(programs.lines_adjacency, variant); - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: - return LazyGeometryProgram(programs.triangles, variant); - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: - return LazyGeometryProgram(programs.triangles_adjacency, variant); - default: - UNREACHABLE_MSG("Unknown primitive mode."); - return LazyGeometryProgram(programs.points, variant); - } -} - -GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, - const ProgramVariant& variant) { - if (target_program) { - return target_program->handle; - } - const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode); - target_program = TryLoadProgram(variant); - if (!target_program) { - target_program = SpecializeShader(code, entries, program_type, variant); - disk_cache.SaveUsage(GetUsage(variant)); - } - - LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); - - return target_program->handle; -}; - CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { const auto found = precompiled_programs.find(GetUsage(variant)); if (found == precompiled_programs.end()) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 2c8faf855..de195cc5d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,22 +86,6 @@ private: explicit CachedShader(const ShaderParameters& params, ProgramType program_type, GLShader::ProgramResult result); - // Geometry programs. These are needed because GLSL needs an input topology but it's not - // declared by the hardware. Workaround this issue by generating a different shader per input - // topology class. - struct GeometryPrograms { - CachedProgram points; - CachedProgram lines; - CachedProgram lines_adjacency; - CachedProgram triangles; - CachedProgram triangles_adjacency; - }; - - GLuint GetGeometryShader(const ProgramVariant& variant); - - /// Generates a geometry shader or returns one that already exists. - GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant); - CachedProgram TryLoadProgram(const ProgramVariant& variant) const; ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; @@ -117,11 +101,6 @@ private: std::size_t shader_length{}; std::unordered_map<ProgramVariant, CachedProgram> programs; - std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs; - - std::unordered_map<u32, GLuint> cbuf_resource_cache; - std::unordered_map<u32, GLuint> gmem_resource_cache; - std::unordered_map<u32, GLint> uniform_cache; }; class ShaderCacheOpenGL final : public RasterizerCache<Shader> { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 359d58cbe..6edb2ca38 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -39,7 +39,7 @@ using namespace VideoCommon::Shader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; -enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; +enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; struct TextureAoffi {}; using TextureArgument = std::pair<Type, Node>; @@ -48,7 +48,7 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); -class ShaderWriter { +class ShaderWriter final { public: void AddExpression(std::string_view text) { DEBUG_ASSERT(scope >= 0); @@ -93,9 +93,157 @@ private: u32 temporary_index = 1; }; +class Expression final { +public: + Expression(std::string code, Type type) : code{std::move(code)}, type{type} { + ASSERT(type != Type::Void); + } + Expression() : type{Type::Void} {} + + Type GetType() const { + return type; + } + + std::string GetCode() const { + return code; + } + + void CheckVoid() const { + ASSERT(type == Type::Void); + } + + std::string As(Type type) const { + switch (type) { + case Type::Bool: + return AsBool(); + case Type::Bool2: + return AsBool2(); + case Type::Float: + return AsFloat(); + case Type::Int: + return AsInt(); + case Type::Uint: + return AsUint(); + case Type::HalfFloat: + return AsHalfFloat(); + default: + UNREACHABLE_MSG("Invalid type"); + return code; + } + } + + std::string AsBool() const { + switch (type) { + case Type::Bool: + return code; + default: + UNREACHABLE_MSG("Incompatible types"); + return code; + } + } + + std::string AsBool2() const { + switch (type) { + case Type::Bool2: + return code; + default: + UNREACHABLE_MSG("Incompatible types"); + return code; + } + } + + std::string AsFloat() const { + switch (type) { + case Type::Float: + return code; + case Type::Uint: + return fmt::format("utof({})", code); + case Type::Int: + return fmt::format("itof({})", code); + case Type::HalfFloat: + return fmt::format("utof(packHalf2x16({}))", code); + default: + UNREACHABLE_MSG("Incompatible types"); + return code; + } + } + + std::string AsInt() const { + switch (type) { + case Type::Float: + return fmt::format("ftoi({})", code); + case Type::Uint: + return fmt::format("int({})", code); + case Type::Int: + return code; + case Type::HalfFloat: + return fmt::format("int(packHalf2x16({}))", code); + default: + UNREACHABLE_MSG("Incompatible types"); + return code; + } + } + + std::string AsUint() const { + switch (type) { + case Type::Float: + return fmt::format("ftou({})", code); + case Type::Uint: + return code; + case Type::Int: + return fmt::format("uint({})", code); + case Type::HalfFloat: + return fmt::format("packHalf2x16({})", code); + default: + UNREACHABLE_MSG("Incompatible types"); + return code; + } + } + + std::string AsHalfFloat() const { + switch (type) { + case Type::Float: + return fmt::format("unpackHalf2x16(ftou({}))", code); + case Type::Uint: + return fmt::format("unpackHalf2x16({})", code); + case Type::Int: + return fmt::format("unpackHalf2x16(int({}))", code); + case Type::HalfFloat: + return code; + default: + UNREACHABLE_MSG("Incompatible types"); + return code; + } + } + +private: + std::string code; + Type type{}; +}; + +constexpr const char* GetTypeString(Type type) { + switch (type) { + case Type::Bool: + return "bool"; + case Type::Bool2: + return "bvec2"; + case Type::Float: + return "float"; + case Type::Int: + return "int"; + case Type::Uint: + return "uint"; + case Type::HalfFloat: + return "vec2"; + default: + UNREACHABLE_MSG("Invalid type"); + return "<invalid type>"; + } +} + /// Generates code to use for a swizzle operation. constexpr const char* GetSwizzle(u32 element) { - constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"}; + constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; return swizzle.at(element); } @@ -134,8 +282,8 @@ constexpr bool IsGenericAttribute(Attribute::Index index) { return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; } -constexpr Attribute::Index ToGenericAttribute(u32 value) { - return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0)); +constexpr Attribute::Index ToGenericAttribute(u64 value) { + return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); } u32 GetGenericAttributeIndex(Attribute::Index index) { @@ -191,7 +339,7 @@ public: // VM's program counter const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}u;", first_address); + code.AddLine("uint jmp_to = {}U;", first_address); // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. @@ -199,7 +347,7 @@ public: constexpr u32 FLOW_STACK_SIZE = 20; for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); } } @@ -210,7 +358,7 @@ public: for (const auto& pair : ir.GetBasicBlocks()) { const auto [address, bb] = pair; - code.AddLine("case 0x{:x}u: {{", address); + code.AddLine("case 0x{:X}U: {{", address); ++code.scope; VisitBlock(bb); @@ -241,11 +389,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& image : ir.GetImages()) { + for (const auto& [offset, image] : ir.GetImages()) { entries.images.emplace_back(image); } - for (const auto& gmem_pair : ir.GetGlobalMemory()) { - const auto& [base, usage] = gmem_pair; + for (const auto& [base, usage] : ir.GetGlobalMemory()) { entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, usage.is_written); } @@ -322,7 +469,7 @@ private: void DeclareRegisters() { const auto& registers = ir.GetRegisters(); for (const u32 gpr : registers) { - code.AddLine("float {} = 0;", GetRegister(gpr)); + code.AddLine("float {} = 0.0f;", GetRegister(gpr)); } if (!registers.empty()) { code.AddNewLine(); @@ -348,7 +495,7 @@ private: return; } const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("float {}[{}];", GetLocalMemory(), element_count); + code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); code.AddNewLine(); } @@ -371,8 +518,6 @@ private: return "noperspective "; default: case AttributeUse::Unused: - UNREACHABLE_MSG("Unused attribute being fetched"); - return {}; UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); return {}; } @@ -449,7 +594,7 @@ private: const auto [index, size] = entry; code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, GetConstBufferBlock(index)); - code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index)); + code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); code.AddLine("}};"); code.AddNewLine(); } @@ -470,7 +615,7 @@ private: code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); - code.AddLine(" float {}[];", GetGlobalMemory(base)); + code.AddLine(" uint {}[];", GetGlobalMemory(base)); code.AddLine("}};"); code.AddNewLine(); } @@ -528,7 +673,7 @@ private: if (!ir.HasPhysicalAttributes()) { return; } - code.AddLine("float readPhysicalAttribute(uint physical_address) {{"); + code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); ++code.scope; code.AddLine("switch (physical_address) {{"); @@ -537,15 +682,16 @@ private: for (u32 index = 0; index < num_attributes; ++index) { const auto attribute{ToGenericAttribute(index)}; for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base{0x80}; - constexpr u32 generic_stride{16}; - constexpr u32 element_stride{4}; + constexpr u32 generic_base = 0x80; + constexpr u32 generic_stride = 16; + constexpr u32 element_stride = 4; const u32 address{generic_base + index * generic_stride + element * element_stride}; - const bool declared{stage != ProgramType::Fragment || - header.ps.GetAttributeUse(index) != AttributeUse::Unused}; - const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; - code.AddLine("case 0x{:x}: return {};", address, value); + const bool declared = stage != ProgramType::Fragment || + header.ps.GetAttributeUse(index) != AttributeUse::Unused; + const std::string value = + declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; + code.AddLine("case 0x{:X}U: return {};", address, value); } } @@ -559,7 +705,7 @@ private: void DeclareImages() { const auto& images{ir.GetImages()}; - for (const auto& image : images) { + for (const auto& [offset, image] : images) { const std::string image_type = [&]() { switch (image.GetType()) { case Tegra::Shader::ImageType::Texture1D: @@ -579,9 +725,16 @@ private: return "image1D"; } }(); - code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform " + std::string qualifier = "coherent volatile"; + if (image.IsRead() && !image.IsWritten()) { + qualifier += " readonly"; + } else if (image.IsWritten() && !image.IsRead()) { + qualifier += " writeonly"; + } + + code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform " "{} {};", - image.GetIndex(), image_type, GetImage(image)); + image.GetIndex(), qualifier, image_type, GetImage(image)); } if (!images.empty()) { code.AddNewLine(); @@ -590,13 +743,11 @@ private: void VisitBlock(const NodeBlock& bb) { for (const auto& node : bb) { - if (const std::string expr = Visit(node); !expr.empty()) { - code.AddLine(expr); - } + Visit(node).CheckVoid(); } } - std::string Visit(const Node& node) { + Expression Visit(const Node& node) { if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); if (operation_index >= operation_decompilers.size()) { @@ -614,18 +765,18 @@ private: if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { - return "0"; + return {"0U", Type::Uint}; } - return GetRegister(index); + return {GetRegister(index), Type::Float}; } if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits - return fmt::format("utof({}u)", immediate->GetValue()); + return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; } - return fmt::format("utof(0x{:x}u)", immediate->GetValue()); + return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; } if (const auto predicate = std::get_if<PredicateNode>(&*node)) { @@ -640,17 +791,18 @@ private: } }(); if (predicate->IsNegated()) { - return fmt::format("!({})", value); + return {fmt::format("!({})", value), Type::Bool}; } - return value; + return {value, Type::Bool}; } if (const auto abuf = std::get_if<AbufNode>(&*node)) { UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { - return fmt::format("readPhysicalAttribute(ftou({}))", - Visit(abuf->GetPhysicalAddress())); + return {fmt::format("ReadPhysicalAttribute({})", + Visit(abuf->GetPhysicalAddress()).AsUint()), + Type::Float}; } return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } @@ -661,59 +813,64 @@ private: // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4); + return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), + offset_imm / (4 * 4), (offset_imm / 4) % 4), + Type::Uint}; } if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); if (!device.HasComponentIndexingBug()) { - return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset), + Type::Uint}; } // AMD's proprietary GLSL compiler emits ill code for variable component access. // To bypass this driver bug generate 4 ifs, one per each component. const std::string pack = code.GenerateTemporary(); - code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), final_offset); const std::string result = code.GenerateTemporary(); - code.AddLine("float {};", result); + code.AddLine("uint {};", result); for (u32 swizzle = 0; swizzle < 4; ++swizzle) { code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, GetSwizzle(swizzle)); } - return result; + return {result, Type::Uint}; } UNREACHABLE_MSG("Unmanaged offset node type"); } if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()); - const std::string base = Visit(gmem->GetBaseAddress()); - const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); - return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + const std::string real = Visit(gmem->GetRealAddress()).AsUint(); + const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); + const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); + return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), + Type::Uint}; } if (const auto lmem = std::get_if<LmemNode>(&*node)) { if (stage == ProgramType::Compute) { LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); } - return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); + return { + fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), + Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { - return GetInternalFlag(internal_flag->GetFlag()); + return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; } if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); + code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); ++code.scope; VisitBlock(conditional->GetCode()); @@ -724,20 +881,21 @@ private: } if (const auto comment = std::get_if<CommentNode>(&*node)) { - return "// " + comment->GetText(); + code.AddLine("// " + comment->GetText()); + return {}; } UNREACHABLE(); return {}; } - std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { + Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { const auto GeometryPass = [&](std::string_view name) { if (stage == ProgramType::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games // set an 0x80000000 index for those and the shader fails to build. Find out why // this happens and what's its intent. - return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer)); + return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); } return std::string(name); }; @@ -746,25 +904,27 @@ private: case Attribute::Index::Position: switch (stage) { case ProgramType::Geometry: - return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), - GetSwizzle(element)); + return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), + GetSwizzle(element)), + Type::Float}; case ProgramType::Fragment: - return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); + return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), + Type::Float}; default: UNREACHABLE(); } case Attribute::Index::PointCoord: switch (element) { case 0: - return "gl_PointCoord.x"; + return {"gl_PointCoord.x", Type::Float}; case 1: - return "gl_PointCoord.y"; + return {"gl_PointCoord.y", Type::Float}; case 2: case 3: - return "0"; + return {"0.0f", Type::Float}; } UNREACHABLE(); - return "0"; + return {"0", Type::Int}; case Attribute::Index::TessCoordInstanceIDVertexID: // TODO(Subv): Find out what the values are for the first two elements when inside a // vertex shader, and what's the value of the fourth element when inside a Tess Eval @@ -773,44 +933,49 @@ private: switch (element) { case 2: // Config pack's first value is instance_id. - return "uintBitsToFloat(config_pack[0])"; + return {"config_pack[0]", Type::Uint}; case 3: - return "uintBitsToFloat(gl_VertexID)"; + return {"gl_VertexID", Type::Int}; } UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return "0"; + return {"0", Type::Int}; case Attribute::Index::FrontFacing: // TODO(Subv): Find out what the values are for the other elements. ASSERT(stage == ProgramType::Fragment); switch (element) { case 3: - return "itof(gl_FrontFacing ? -1 : 0)"; + return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; } UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return "0"; + return {"0", Type::Int}; default: if (IsGenericAttribute(attribute)) { - return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); + return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), + Type::Float}; } break; } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); - return "0"; + return {"0", Type::Int}; } - std::string ApplyPrecise(Operation operation, const std::string& value) { + Expression ApplyPrecise(Operation operation, std::string value, Type type) { if (!IsPrecise(operation)) { - return value; + return {std::move(value), type}; } - // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders - const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; + // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to + // be found in fragment shaders, so we disable precise there. There are vertex shaders that + // also fail to build but nobody seems to care about those. + // Note: Only bugged drivers will skip precise. + const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment; - const std::string temporary = code.GenerateTemporary(); - code.AddLine("{}float {} = {};", precise, temporary, value); - return temporary; + std::string temporary = code.GenerateTemporary(); + code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), + temporary, value); + return {std::move(temporary), type}; } - std::string VisitOperand(Operation operation, std::size_t operand_index) { + Expression VisitOperand(Operation operation, std::size_t operand_index) { const auto& operand = operation[operand_index]; const bool parent_precise = IsPrecise(operation); const bool child_precise = IsPrecise(operand); @@ -819,19 +984,16 @@ private: return Visit(operand); } - const std::string temporary = code.GenerateTemporary(); - code.AddLine("float {} = {};", temporary, Visit(operand)); - return temporary; - } - - std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { - return CastOperand(VisitOperand(operation, operand_index), type); + Expression value = Visit(operand); + std::string temporary = code.GenerateTemporary(); + code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); + return {std::move(temporary), value.GetType()}; } - std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) { + Expression GetOutputAttribute(const AbufNode* abuf) { switch (const auto attribute = abuf->GetIndex()) { case Attribute::Index::Position: - return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false); + return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}; case Attribute::Index::LayerViewportPointSize: switch (abuf->GetElement()) { case 0: @@ -841,119 +1003,79 @@ private: if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { return {}; } - return std::make_pair("gl_Layer", true); + return {"gl_Layer", Type::Int}; case 2: if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { return {}; } - return std::make_pair("gl_ViewportIndex", true); + return {"gl_ViewportIndex", Type::Int}; case 3: UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); - return std::make_pair("gl_PointSize", false); + return {"gl_PointSize", Type::Float}; } return {}; case Attribute::Index::ClipDistances0123: - return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false); + return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}; case Attribute::Index::ClipDistances4567: - return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), - false); + return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}; default: if (IsGenericAttribute(attribute)) { - return std::make_pair( - GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false); + return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), + Type::Float}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); return {}; } } - std::string CastOperand(const std::string& value, Type type) const { - switch (type) { - case Type::Bool: - case Type::Bool2: - case Type::Float: - return value; - case Type::Int: - return fmt::format("ftoi({})", value); - case Type::Uint: - return fmt::format("ftou({})", value); - case Type::HalfFloat: - return fmt::format("toHalf2({})", value); - } - UNREACHABLE(); - return value; + Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, + Type type_a) { + std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); + return ApplyPrecise(operation, std::move(op_str), result_type); } - std::string BitwiseCastResult(const std::string& value, Type type, - bool needs_parenthesis = false) { - switch (type) { - case Type::Bool: - case Type::Bool2: - case Type::Float: - if (needs_parenthesis) { - return fmt::format("({})", value); - } - return value; - case Type::Int: - return fmt::format("itof({})", value); - case Type::Uint: - return fmt::format("utof({})", value); - case Type::HalfFloat: - return fmt::format("fromHalf2({})", value); - } - UNREACHABLE(); - return value; - } - - std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, - Type type_a, bool needs_parenthesis = true) { - const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a)); - - return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis)); - } - - std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0, type_a); - const std::string op_b = VisitOperand(operation, 1, type_b); - const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); + Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, + Type type_a, Type type_b) { + const std::string op_a = VisitOperand(operation, 0).As(type_a); + const std::string op_b = VisitOperand(operation, 1).As(type_b); + std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); + return ApplyPrecise(operation, std::move(op_str), result_type); } - std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0, type_a); - const std::string op_b = VisitOperand(operation, 1, type_b); - const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); + Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, + Type type_a, Type type_b) { + const std::string op_a = VisitOperand(operation, 0).As(type_a); + const std::string op_b = VisitOperand(operation, 1).As(type_b); + std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); + return ApplyPrecise(operation, std::move(op_str), result_type); } - std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0, type_a); - const std::string op_b = VisitOperand(operation, 1, type_b); - const std::string op_c = VisitOperand(operation, 2, type_c); - const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); + Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, + Type type_a, Type type_b, Type type_c) { + const std::string op_a = VisitOperand(operation, 0).As(type_a); + const std::string op_b = VisitOperand(operation, 1).As(type_b); + const std::string op_c = VisitOperand(operation, 2).As(type_c); + std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); + return ApplyPrecise(operation, std::move(op_str), result_type); } - std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0, type_a); - const std::string op_b = VisitOperand(operation, 1, type_b); - const std::string op_c = VisitOperand(operation, 2, type_c); - const std::string op_d = VisitOperand(operation, 3, type_d); - const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); + Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, + Type type_a, Type type_b, Type type_c, Type type_d) { + const std::string op_a = VisitOperand(operation, 0).As(type_a); + const std::string op_b = VisitOperand(operation, 1).As(type_b); + const std::string op_c = VisitOperand(operation, 2).As(type_c); + const std::string op_d = VisitOperand(operation, 3).As(type_d); + std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); + return ApplyPrecise(operation, std::move(op_str), result_type); } std::string GenerateTexture(Operation operation, const std::string& function_suffix, const std::vector<TextureIR>& extras) { - constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; + constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -970,17 +1092,17 @@ private: expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]); + expr += Visit(operation[i]).AsFloat(); const std::size_t next = i + 1; if (next < count) expr += ", "; } if (has_array) { - expr += ", float(ftoi(" + Visit(meta->array) + "))"; + expr += ", float(" + Visit(meta->array).AsInt() + ')'; } if (has_shadow) { - expr += ", " + Visit(meta->depth_compare); + expr += ", " + Visit(meta->depth_compare).AsFloat(); } expr += ')'; @@ -1011,11 +1133,11 @@ private: // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else { - expr += fmt::format("ftoi({})", Visit(operand)); + expr += Visit(operand).AsInt(); } break; case Type::Float: - expr += Visit(operand); + expr += Visit(operand).AsFloat(); break; default: { const auto type_int = static_cast<u32>(type); @@ -1031,7 +1153,7 @@ private: if (aoffi.empty()) { return {}; } - constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; + constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; std::string expr = ", "; expr += coord_constructors.at(aoffi.size() - 1); expr += '('; @@ -1044,7 +1166,7 @@ private: expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else if (device.HasVariableAoffi()) { // Avoid using variable AOFFI on unsupported devices. - expr += fmt::format("ftoi({})", Visit(operand)); + expr += Visit(operand).AsInt(); } else { // Insert 0 on devices not supporting variable AOFFI. expr += '0'; @@ -1058,328 +1180,314 @@ private: return expr; } - std::string Assign(Operation operation) { + Expression Assign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; - std::string target; - bool is_integer = false; - + Expression target; if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } - target = GetRegister(gpr->GetIndex()); + target = {GetRegister(gpr->GetIndex()), Type::Float}; } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - const auto result = GetOutputAttribute(abuf); - if (!result) { - return {}; - } - target = result->first; - is_integer = result->second; + target = GetOutputAttribute(abuf); } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { if (stage == ProgramType::Compute) { LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); } - target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); + target = { + fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), + Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()); - const std::string base = Visit(gmem->GetBaseAddress()); - const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); - target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + const std::string real = Visit(gmem->GetRealAddress()).AsUint(); + const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); + const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); + target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), + Type::Uint}; } else { UNREACHABLE_MSG("Assign called without a proper target"); } - if (is_integer) { - code.AddLine("{} = ftoi({});", target, Visit(src)); - } else { - code.AddLine("{} = {};", target, Visit(src)); - } + code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); return {}; } template <Type type> - std::string Add(Operation operation) { + Expression Add(Operation operation) { return GenerateBinaryInfix(operation, "+", type, type, type); } template <Type type> - std::string Mul(Operation operation) { + Expression Mul(Operation operation) { return GenerateBinaryInfix(operation, "*", type, type, type); } template <Type type> - std::string Div(Operation operation) { + Expression Div(Operation operation) { return GenerateBinaryInfix(operation, "/", type, type, type); } template <Type type> - std::string Fma(Operation operation) { + Expression Fma(Operation operation) { return GenerateTernary(operation, "fma", type, type, type, type); } template <Type type> - std::string Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type, true); + Expression Negate(Operation operation) { + return GenerateUnary(operation, "-", type, type); } template <Type type> - std::string Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type, false); + Expression Absolute(Operation operation) { + return GenerateUnary(operation, "abs", type, type); } - std::string FClamp(Operation operation) { + Expression FClamp(Operation operation) { return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, Type::Float); } - std::string FCastHalf0(Operation operation) { - const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); - return fmt::format("({})[0]", op_a); + Expression FCastHalf0(Operation operation) { + return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; } - std::string FCastHalf1(Operation operation) { - const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); - return fmt::format("({})[1]", op_a); + Expression FCastHalf1(Operation operation) { + return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; } template <Type type> - std::string Min(Operation operation) { + Expression Min(Operation operation) { return GenerateBinaryCall(operation, "min", type, type, type); } template <Type type> - std::string Max(Operation operation) { + Expression Max(Operation operation) { return GenerateBinaryCall(operation, "max", type, type, type); } - std::string Select(Operation operation) { - const std::string condition = Visit(operation[0]); - const std::string true_case = Visit(operation[1]); - const std::string false_case = Visit(operation[2]); - const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); + Expression Select(Operation operation) { + const std::string condition = Visit(operation[0]).AsBool(); + const std::string true_case = Visit(operation[1]).AsUint(); + const std::string false_case = Visit(operation[2]).AsUint(); + std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - return ApplyPrecise(operation, op_str); + return ApplyPrecise(operation, std::move(op_str), Type::Uint); } - std::string FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float, false); + Expression FCos(Operation operation) { + return GenerateUnary(operation, "cos", Type::Float, Type::Float); } - std::string FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float, false); + Expression FSin(Operation operation) { + return GenerateUnary(operation, "sin", Type::Float, Type::Float); } - std::string FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false); + Expression FExp2(Operation operation) { + return GenerateUnary(operation, "exp2", Type::Float, Type::Float); } - std::string FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float, false); + Expression FLog2(Operation operation) { + return GenerateUnary(operation, "log2", Type::Float, Type::Float); } - std::string FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false); + Expression FInverseSqrt(Operation operation) { + return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); } - std::string FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false); + Expression FSqrt(Operation operation) { + return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); } - std::string FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false); + Expression FRoundEven(Operation operation) { + return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); } - std::string FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float, false); + Expression FFloor(Operation operation) { + return GenerateUnary(operation, "floor", Type::Float, Type::Float); } - std::string FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false); + Expression FCeil(Operation operation) { + return GenerateUnary(operation, "ceil", Type::Float, Type::Float); } - std::string FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false); + Expression FTrunc(Operation operation) { + return GenerateUnary(operation, "trunc", Type::Float, Type::Float); } template <Type type> - std::string FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type, false); + Expression FCastInteger(Operation operation) { + return GenerateUnary(operation, "float", Type::Float, type); } - std::string ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float, false); + Expression ICastFloat(Operation operation) { + return GenerateUnary(operation, "int", Type::Int, Type::Float); } - std::string ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint, false); + Expression ICastUnsigned(Operation operation) { + return GenerateUnary(operation, "int", Type::Int, Type::Uint); } template <Type type> - std::string LogicalShiftLeft(Operation operation) { + Expression LogicalShiftLeft(Operation operation) { return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); } - std::string ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0, Type::Uint); - const std::string op_b = VisitOperand(operation, 1, Type::Uint); - const std::string op_str = fmt::format("int({} >> {})", op_a, op_b); + Expression ILogicalShiftRight(Operation operation) { + const std::string op_a = VisitOperand(operation, 0).AsUint(); + const std::string op_b = VisitOperand(operation, 1).AsUint(); + std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int)); + return ApplyPrecise(operation, std::move(op_str), Type::Int); } - std::string IArithmeticShiftRight(Operation operation) { + Expression IArithmeticShiftRight(Operation operation) { return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); } template <Type type> - std::string BitwiseAnd(Operation operation) { + Expression BitwiseAnd(Operation operation) { return GenerateBinaryInfix(operation, "&", type, type, type); } template <Type type> - std::string BitwiseOr(Operation operation) { + Expression BitwiseOr(Operation operation) { return GenerateBinaryInfix(operation, "|", type, type, type); } template <Type type> - std::string BitwiseXor(Operation operation) { + Expression BitwiseXor(Operation operation) { return GenerateBinaryInfix(operation, "^", type, type, type); } template <Type type> - std::string BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type, false); + Expression BitwiseNot(Operation operation) { + return GenerateUnary(operation, "~", type, type); } - std::string UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false); + Expression UCastFloat(Operation operation) { + return GenerateUnary(operation, "uint", Type::Uint, Type::Float); } - std::string UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false); + Expression UCastSigned(Operation operation) { + return GenerateUnary(operation, "uint", Type::Uint, Type::Int); } - std::string UShiftRight(Operation operation) { + Expression UShiftRight(Operation operation) { return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); } template <Type type> - std::string BitfieldInsert(Operation operation) { + Expression BitfieldInsert(Operation operation) { return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, Type::Int); } template <Type type> - std::string BitfieldExtract(Operation operation) { + Expression BitfieldExtract(Operation operation) { return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); } template <Type type> - std::string BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type, false); + Expression BitCount(Operation operation) { + return GenerateUnary(operation, "bitCount", type, type); } - std::string HNegate(Operation operation) { + Expression HNegate(Operation operation) { const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; + return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; }; - const std::string value = - fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat), - GetNegate(1), GetNegate(2)); - return BitwiseCastResult(value, Type::HalfFloat); - } - - std::string HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0, Type::HalfFloat); - const std::string min = VisitOperand(operation, 1, Type::Float); - const std::string max = VisitOperand(operation, 2, Type::Float); - const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); - } - - std::string HCastFloat(Operation operation) { - const std::string op_a = VisitOperand(operation, 0, Type::Float); - return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); - } - - std::string HUnpack(Operation operation) { - const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; - const auto value = [&]() -> std::string { - switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return fmt::format("vec2(fromHalf2({}))", operand); - case Tegra::Shader::HalfType::H0_H0: - return fmt::format("vec2({}[0])", operand); - case Tegra::Shader::HalfType::H1_H1: - return fmt::format("vec2({}[1])", operand); - } - UNREACHABLE(); - return "0"; - }(); - return fmt::format("fromHalf2({})", value); + return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), + GetNegate(1), GetNegate(2)), + Type::HalfFloat}; + } + + Expression HClamp(Operation operation) { + const std::string value = VisitOperand(operation, 0).AsHalfFloat(); + const std::string min = VisitOperand(operation, 1).AsFloat(); + const std::string max = VisitOperand(operation, 2).AsFloat(); + std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); + + return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); + } + + Expression HCastFloat(Operation operation) { + return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat}; + } + + Expression HUnpack(Operation operation) { + Expression operand = VisitOperand(operation, 0); + switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { + case Tegra::Shader::HalfType::H0_H1: + return operand; + case Tegra::Shader::HalfType::F32: + return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; + case Tegra::Shader::HalfType::H0_H0: + return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; + case Tegra::Shader::HalfType::H1_H1: + return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; + } } - std::string HMergeF32(Operation operation) { - return fmt::format("float(toHalf2({})[0])", Visit(operation[0])); + Expression HMergeF32(Operation operation) { + return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; } - std::string HMergeH0(Operation operation) { - return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]), - Visit(operation[0])); + Expression HMergeH0(Operation operation) { + std::string dest = VisitOperand(operation, 0).AsUint(); + std::string src = VisitOperand(operation, 1).AsUint(); + return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; } - std::string HMergeH1(Operation operation) { - return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]), - Visit(operation[1])); + Expression HMergeH1(Operation operation) { + std::string dest = VisitOperand(operation, 0).AsUint(); + std::string src = VisitOperand(operation, 1).AsUint(); + return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; } - std::string HPack2(Operation operation) { - return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]), - Visit(operation[1])); + Expression HPack2(Operation operation) { + return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), + VisitOperand(operation, 1).AsFloat()), + Type::HalfFloat}; } template <Type type> - std::string LogicalLessThan(Operation operation) { + Expression LogicalLessThan(Operation operation) { return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); } template <Type type> - std::string LogicalEqual(Operation operation) { + Expression LogicalEqual(Operation operation) { return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); } template <Type type> - std::string LogicalLessEqual(Operation operation) { + Expression LogicalLessEqual(Operation operation) { return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); } template <Type type> - std::string LogicalGreaterThan(Operation operation) { + Expression LogicalGreaterThan(Operation operation) { return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); } template <Type type> - std::string LogicalNotEqual(Operation operation) { + Expression LogicalNotEqual(Operation operation) { return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); } template <Type type> - std::string LogicalGreaterEqual(Operation operation) { + Expression LogicalGreaterEqual(Operation operation) { return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); } - std::string LogicalFIsNan(Operation operation) { - return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false); + Expression LogicalFIsNan(Operation operation) { + return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); } - std::string LogicalAssign(Operation operation) { + Expression LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -1400,78 +1508,80 @@ private: target = GetInternalFlag(flag->GetFlag()); } - code.AddLine("{} = {};", target, Visit(src)); + code.AddLine("{} = {};", target, Visit(src).AsBool()); return {}; } - std::string LogicalAnd(Operation operation) { + Expression LogicalAnd(Operation operation) { return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); } - std::string LogicalOr(Operation operation) { + Expression LogicalOr(Operation operation) { return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); } - std::string LogicalXor(Operation operation) { + Expression LogicalXor(Operation operation) { return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); } - std::string LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false); + Expression LogicalNegate(Operation operation) { + return GenerateUnary(operation, "!", Type::Bool, Type::Bool); } - std::string LogicalPick2(Operation operation) { - const std::string pair = VisitOperand(operation, 0, Type::Bool2); - return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); + Expression LogicalPick2(Operation operation) { + return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), + VisitOperand(operation, 1).AsUint()), + Type::Bool}; } - std::string LogicalAnd2(Operation operation) { + Expression LogicalAnd2(Operation operation) { return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); } template <bool with_nan> - std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { - const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat)}; + Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { + Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, + Type::HalfFloat, Type::HalfFloat); if constexpr (!with_nan) { return comparison; } - return fmt::format("halfFloatNanComparison({}, {}, {})", comparison, - VisitOperand(operation, 0, Type::HalfFloat), - VisitOperand(operation, 1, Type::HalfFloat)); + return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), + VisitOperand(operation, 0).AsHalfFloat(), + VisitOperand(operation, 1).AsHalfFloat()), + Type::Bool2}; } template <bool with_nan> - std::string Logical2HLessThan(Operation operation) { + Expression Logical2HLessThan(Operation operation) { return GenerateHalfComparison<with_nan>(operation, "lessThan"); } template <bool with_nan> - std::string Logical2HEqual(Operation operation) { + Expression Logical2HEqual(Operation operation) { return GenerateHalfComparison<with_nan>(operation, "equal"); } template <bool with_nan> - std::string Logical2HLessEqual(Operation operation) { + Expression Logical2HLessEqual(Operation operation) { return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); } template <bool with_nan> - std::string Logical2HGreaterThan(Operation operation) { + Expression Logical2HGreaterThan(Operation operation) { return GenerateHalfComparison<with_nan>(operation, "greaterThan"); } template <bool with_nan> - std::string Logical2HNotEqual(Operation operation) { + Expression Logical2HNotEqual(Operation operation) { return GenerateHalfComparison<with_nan>(operation, "notEqual"); } template <bool with_nan> - std::string Logical2HGreaterEqual(Operation operation) { + Expression Logical2HGreaterEqual(Operation operation) { return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); } - std::string Texture(Operation operation) { + Expression Texture(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1480,10 +1590,10 @@ private: if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } - return expr + GetSwizzle(meta->element); + return {expr + GetSwizzle(meta->element), Type::Float}; } - std::string TextureLod(Operation operation) { + Expression TextureLod(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); @@ -1492,54 +1602,54 @@ private: if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } - return expr + GetSwizzle(meta->element); + return {expr + GetSwizzle(meta->element), Type::Float}; } - std::string TextureGather(Operation operation) { + Expression TextureGather(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; - return GenerateTexture(operation, "Gather", - {TextureArgument{type, meta->component}, TextureAoffi{}}) + - GetSwizzle(meta->element); + return {GenerateTexture(operation, "Gather", + {TextureArgument{type, meta->component}, TextureAoffi{}}) + + GetSwizzle(meta->element), + Type::Float}; } - std::string TextureQueryDimensions(Operation operation) { + Expression TextureQueryDimensions(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0, Type::Int); + const std::string lod = VisitOperand(operation, 0).AsInt(); switch (meta->element) { case 0: case 1: - return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod, - GetSwizzle(meta->element)); - case 2: - return "0"; + return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), + Type::Int}; case 3: - return fmt::format("itof(textureQueryLevels({}))", sampler); + return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; } UNREACHABLE(); - return "0"; + return {"0", Type::Int}; } - std::string TextureQueryLod(Operation operation) { + Expression TextureQueryLod(Operation operation) { const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); if (meta->element < 2) { - return fmt::format("itof(int(({} * vec2(256)){}))", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)); + return {fmt::format("int(({} * vec2(256)){})", + GenerateTexture(operation, "QueryLod", {}), + GetSwizzle(meta->element)), + Type::Int}; } - return "0"; + return {"0", Type::Int}; } - std::string TexelFetch(Operation operation) { - constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; + Expression TexelFetch(Operation operation) { + constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); UNIMPLEMENTED_IF(meta->sampler.IsArray()); @@ -1552,7 +1662,7 @@ private: expr += constructors.at(operation.GetOperandsCount() - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { - expr += VisitOperand(operation, i, Type::Int); + expr += VisitOperand(operation, i).AsInt(); const std::size_t next = i + 1; if (next == count) expr += ')'; @@ -1565,7 +1675,7 @@ private: if (meta->lod) { expr += ", "; - expr += CastOperand(Visit(meta->lod), Type::Int); + expr += Visit(meta->lod).AsInt(); } expr += ')'; expr += GetSwizzle(meta->element); @@ -1580,11 +1690,11 @@ private: code.AddLine("float {} = {};", tmp, expr); code.AddLine("#endif"); - return tmp; + return {tmp, Type::Float}; } - std::string ImageStore(Operation operation) { - constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + Expression ImageStore(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; const auto meta{std::get<MetaImage>(operation.GetMeta())}; std::string expr = "imageStore("; @@ -1594,7 +1704,7 @@ private: const std::size_t coords_count{operation.GetOperandsCount()}; expr += constructors.at(coords_count - 1); for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i, Type::Int); + expr += VisitOperand(operation, i).AsInt(); if (i + 1 < coords_count) { expr += ", "; } @@ -1605,7 +1715,7 @@ private: UNIMPLEMENTED_IF(values_count != 4); expr += "vec4("; for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)); + expr += Visit(meta.values.at(i)).AsFloat(); if (i + 1 < values_count) { expr += ", "; } @@ -1616,52 +1726,52 @@ private: return {}; } - std::string Branch(Operation operation) { + Expression Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); + code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); code.AddLine("break;"); return {}; } - std::string BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0, Type::Uint); + Expression BranchIndirect(Operation operation) { + const std::string op_a = VisitOperand(operation, 0).AsUint(); code.AddLine("jmp_to = {};", op_a); code.AddLine("break;"); return {}; } - std::string PushFlowStack(Operation operation) { + Expression PushFlowStack(Operation operation) { const auto stack = std::get<MetaStackClass>(operation.GetMeta()); const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), + code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), target->GetValue()); return {}; } - std::string PopFlowStack(Operation operation) { + Expression PopFlowStack(Operation operation) { const auto stack = std::get<MetaStackClass>(operation.GetMeta()); code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); code.AddLine("break;"); return {}; } - std::string Exit(Operation operation) { + Expression Exit(Operation operation) { if (stage != ProgramType::Fragment) { code.AddLine("return;"); return {}; } const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> std::string { + const auto SafeGetRegister = [&](u32 reg) -> Expression { // TODO(Rodrigo): Replace with contains once C++20 releases if (used_registers.find(reg) != used_registers.end()) { - return GetRegister(reg); + return {GetRegister(reg), Type::Float}; } - return "0.0f"; + return {"0.0f", Type::Float}; }; UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); @@ -1674,7 +1784,7 @@ private: for (u32 component = 0; component < 4; ++component) { if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { code.AddLine("FragColor{}[{}] = {};", render_target, component, - SafeGetRegister(current_reg)); + SafeGetRegister(current_reg).AsFloat()); ++current_reg; } } @@ -1683,14 +1793,14 @@ private: if (header.ps.omap.depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1)); + code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); } code.AddLine("return;"); return {}; } - std::string Discard(Operation operation) { + Expression Discard(Operation operation) { // Enclose "discard" in a conditional, so that GLSL compilation does not complain // about unexecuted instructions that may follow this. code.AddLine("if (true) {{"); @@ -1701,7 +1811,7 @@ private: return {}; } - std::string EmitVertex(Operation operation) { + Expression EmitVertex(Operation operation) { ASSERT_MSG(stage == ProgramType::Geometry, "EmitVertex is expected to be used in a geometry shader."); @@ -1712,7 +1822,7 @@ private: return {}; } - std::string EndPrimitive(Operation operation) { + Expression EndPrimitive(Operation operation) { ASSERT_MSG(stage == ProgramType::Geometry, "EndPrimitive is expected to be used in a geometry shader."); @@ -1720,59 +1830,59 @@ private: return {}; } - std::string YNegate(Operation operation) { + Expression YNegate(Operation operation) { // Config pack's third value is Y_NEGATE's state. - return "uintBitsToFloat(config_pack[2])"; + return {"config_pack[2]", Type::Uint}; } template <u32 element> - std::string LocalInvocationId(Operation) { - return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')'; + Expression LocalInvocationId(Operation) { + return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; } template <u32 element> - std::string WorkGroupId(Operation) { - return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; + Expression WorkGroupId(Operation) { + return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; } - std::string BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0, Type::Bool); + Expression BallotThread(Operation operation) { + const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { LOG_ERROR(Render_OpenGL, "Nvidia warp intrinsics are not available and its required by a shader"); // Stub on non-Nvidia devices by simulating all threads voting the same as the active // one. - return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value); + return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; } - return fmt::format("utof(ballotThreadNV({}))", value); + return {fmt::format("ballotThreadNV({})", value), Type::Uint}; } - std::string Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0, Type::Bool); + Expression Vote(Operation operation, const char* func) { + const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are not available and its required by a shader"); // Stub with a warp size of one. - return value; + return {value, Type::Bool}; } - return fmt::format("{}({})", func, value); + return {fmt::format("{}({})", func, value), Type::Bool}; } - std::string VoteAll(Operation operation) { + Expression VoteAll(Operation operation) { return Vote(operation, "allThreadsNV"); } - std::string VoteAny(Operation operation) { + Expression VoteAny(Operation operation) { return Vote(operation, "anyThreadNV"); } - std::string VoteEqual(Operation operation) { + Expression VoteEqual(Operation operation) { if (!device.HasWarpIntrinsics()) { LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are not available and its required by a shader"); // We must return true here since a stub for a theoretical warp size of 1 will always // return an equal result for all its votes. - return "true"; + return {"true", Type::Bool}; } return Vote(operation, "allThreadsEqualNV"); } @@ -1973,8 +2083,8 @@ private: } std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag", - "carry_flag", "overflow_flag"}; + constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", + "overflow_flag"}; const auto index = static_cast<u32>(flag); ASSERT(index < static_cast<u32>(InternalFlag::Amount)); @@ -2022,24 +2132,16 @@ private: std::string GetCommonDeclarations() { return fmt::format( - "#define MAX_CONSTBUFFER_ELEMENTS {}\n" "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" "#define utof uintBitsToFloat\n\n" - "float fromHalf2(vec2 pair) {{\n" - " return utof(packHalf2x16(pair));\n" - "}}\n\n" - "vec2 toHalf2(float value) {{\n" - " return unpackHalf2x16(ftou(value));\n" - "}}\n\n" - "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" + "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" " bvec2 is_nan1 = isnan(pair1);\n" " bvec2 is_nan2 = isnan(pair2);\n" " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " "is_nan2.y);\n" - "}}\n", - MAX_CONSTBUFFER_ELEMENTS); + "}}\n\n"); } ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 969fe9ced..5450feedf 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -341,13 +341,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 index{}; u32 type{}; u8 is_bindless{}; + u8 is_read{}; + u8 is_written{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) { + !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || + !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { return {}; } - entry.entries.images.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0); + entry.entries.images.emplace_back(static_cast<u64>(offset), static_cast<std::size_t>(index), + static_cast<Tegra::Shader::ImageType>(type), + is_bindless != 0, is_written != 0, is_read != 0); } u32 global_memory_count{}; @@ -429,7 +432,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) { + !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0))) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index f4777d0b0..6eabf4fac 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -34,6 +34,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) { return changed; } +template <typename T> +std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { + std::optional<std::size_t> first; + std::size_t last; + for (std::size_t i = 0; i < std::size(current_values); ++i) { + if (!UpdateValue(current_values[i], new_values[i])) { + continue; + } + if (!first) { + first = i; + } + last = i; + } + if (!first) { + return std::nullopt; + } + return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); +} + void Enable(GLenum cap, bool enable) { if (enable) { glEnable(cap); @@ -134,10 +153,6 @@ OpenGLState::OpenGLState() { logic_op.enabled = false; logic_op.operation = GL_COPY; - for (auto& texture_unit : texture_units) { - texture_unit.Reset(); - } - draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -496,52 +511,20 @@ void OpenGLState::ApplyAlphaTest() const { } void OpenGLState::ApplyTextures() const { - bool has_delta{}; - std::size_t first{}; - std::size_t last{}; - std::array<GLuint, Maxwell::NumTextureSamplers> textures; - - for (std::size_t i = 0; i < std::size(texture_units); ++i) { - const auto& texture_unit = texture_units[i]; - auto& cur_state_texture_unit = cur_state.texture_units[i]; - textures[i] = texture_unit.texture; - if (cur_state_texture_unit.texture == textures[i]) { - continue; - } - cur_state_texture_unit.texture = textures[i]; - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; - } - if (has_delta) { - glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - textures.data() + first); + if (const auto update = UpdateArray(cur_state.textures, textures)) { + glBindTextures(update->first, update->second, textures.data() + update->first); } } void OpenGLState::ApplySamplers() const { - bool has_delta{}; - std::size_t first{}; - std::size_t last{}; - std::array<GLuint, Maxwell::NumTextureSamplers> samplers; - - for (std::size_t i = 0; i < std::size(samplers); ++i) { - samplers[i] = texture_units[i].sampler; - if (cur_state.texture_units[i].sampler == texture_units[i].sampler) { - continue; - } - cur_state.texture_units[i].sampler = texture_units[i].sampler; - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (const auto update = UpdateArray(cur_state.samplers, samplers)) { + glBindSamplers(update->first, update->second, samplers.data() + update->first); } - if (has_delta) { - glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - samplers.data() + first); +} + +void OpenGLState::ApplyImages() const { + if (const auto update = UpdateArray(cur_state.images, images)) { + glBindImageTextures(update->first, update->second, images.data() + update->first); } } @@ -576,6 +559,7 @@ void OpenGLState::Apply() { ApplyLogicOp(); ApplyTextures(); ApplySamplers(); + ApplyImages(); if (dirty.polygon_offset) { ApplyPolygonOffset(); dirty.polygon_offset = false; @@ -606,18 +590,18 @@ void OpenGLState::EmulateViewportWithScissor() { } OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { - for (auto& unit : texture_units) { - if (unit.texture == handle) { - unit.Unbind(); + for (auto& texture : textures) { + if (texture == handle) { + texture = 0; } } return *this; } OpenGLState& OpenGLState::ResetSampler(GLuint handle) { - for (auto& unit : texture_units) { - if (unit.sampler == handle) { - unit.sampler = 0; + for (auto& sampler : samplers) { + if (sampler == handle) { + sampler = 0; } } return *this; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fdf9a8a12..949b13051 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -118,21 +118,9 @@ public: GLenum operation; } logic_op; - // 3 texture units - one for each that is used in PICA fragment shader emulation - struct TextureUnit { - GLuint texture; // GL_TEXTURE_BINDING_2D - GLuint sampler; // GL_SAMPLER_BINDING - - void Unbind() { - texture = 0; - } - - void Reset() { - Unbind(); - sampler = 0; - } - }; - std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{}; struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING @@ -220,6 +208,7 @@ public: void ApplyLogicOp() const; void ApplyTextures() const; void ApplySamplers() const; + void ApplyImages() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; void ApplyAlphaTest() const; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 21324488a..8e13ab38b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -78,6 +78,17 @@ public: /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; + void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); + + void MarkAsModified(u64 tick) { + surface.MarkAsModified(true, tick); + } + GLuint GetTexture() const { if (is_proxy) { return surface.GetTexture(); @@ -89,13 +100,6 @@ public: return surface.GetSurfaceParams(); } - void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); - private: u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, @@ -111,8 +115,8 @@ private: GLenum target{}; OGLTextureView texture_view; - u32 swizzle; - bool is_proxy; + u32 swizzle{}; + bool is_proxy{}; }; class TextureCacheOpenGL final : public TextureCacheBase { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index af9684839..839178152 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -342,7 +342,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), }}; - state.texture_units[0].texture = screen_info.display_texture; + state.textures[0] = screen_info.display_texture; // Workaround brigthness problems in SMO by enabling sRGB in the final output // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); @@ -352,7 +352,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); // Restore default state state.framebuffer_srgb.enabled = false; - state.texture_units[0].texture = 0; + state.textures[0] = 0; state.AllDirty(); state.Apply(); // Clear sRGB state for the next frame diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index afea33e5f..840694527 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -42,9 +42,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { cond = instr.hsetp2.reg.cond; h_and = instr.hsetp2.reg.h_and; op_b = - UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b, - instr.hsetp2.reg.negate_b), - instr.hsetp2.reg.type_b); + GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), + instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); break; default: UNREACHABLE(); @@ -52,22 +51,22 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { } const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred); + const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); const auto Write = [&](u64 dest, Node src) { SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); }; const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred0; - const u64 second = instr.hsetp2.pred39; + const u64 first = instr.hsetp2.pred3; + const u64 second = instr.hsetp2.pred0; if (h_and) { - const Node joined = Operation(OperationCode::LogicalAnd2, comparison); + Node joined = Operation(OperationCode::LogicalAnd2, comparison); Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, joined)); + Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u))); + Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); + Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); } return pc; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 77151a24b..008109a99 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -61,56 +61,54 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { } const auto type{instr.sust.image_type}; - const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; + auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; + image.MarkWrite(); + MetaImage meta{image, values}; const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; bb.push_back(store); break; } default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); } return pc; } -const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset{static_cast<std::size_t>(image.index.Value())}; +Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { + const auto offset{static_cast<u64>(image.index.Value())}; // If this image has already been used, return the existing mapping. - const auto itr{std::find_if(used_images.begin(), used_images.end(), - [=](const Image& entry) { return entry.GetOffset() == offset; })}; - if (itr != used_images.end()) { - ASSERT(itr->GetType() == type); - return *itr; + const auto it = used_images.find(offset); + if (it != used_images.end()) { + ASSERT(it->second.GetType() == type); + return it->second; } // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - const Image entry{offset, next_index, type}; - return *used_images.emplace(entry).first; + return used_images.emplace(offset, Image{offset, next_index, type}).first->second; } -const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, - Tegra::Shader::ImageType type) { +Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register{GetRegister(reg)}; const auto [base_image, cbuf_index, cbuf_offset]{ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; // If this image has already been used, return the existing mapping. - const auto itr{std::find_if(used_images.begin(), used_images.end(), - [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })}; - if (itr != used_images.end()) { - ASSERT(itr->GetType() == type); - return *itr; + const auto it = used_images.find(cbuf_key); + if (it != used_images.end()) { + ASSERT(it->second.GetType() == type); + return it->second; } // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - const Image entry{cbuf_index, cbuf_offset, next_index, type}; - return *used_images.emplace(entry).first; + return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) + .first->second; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5db9313c4..b29aedce8 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -273,46 +273,64 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; -class Image { +class Image final { public: - explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) + constexpr explicit Image(u64 offset, std::size_t index, Tegra::Shader::ImageType type) : offset{offset}, index{index}, type{type}, is_bindless{false} {} - explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) + constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::ImageType type) : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, is_bindless{true} {} - explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless) - : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + bool is_bindless, bool is_written, bool is_read) + : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, + is_written{is_written}, is_read{is_read} {} - std::size_t GetOffset() const { + void MarkRead() { + is_read = true; + } + + void MarkWrite() { + is_written = true; + } + + constexpr std::size_t GetOffset() const { return offset; } - std::size_t GetIndex() const { + constexpr std::size_t GetIndex() const { return index; } - Tegra::Shader::ImageType GetType() const { + constexpr Tegra::Shader::ImageType GetType() const { return type; } - bool IsBindless() const { + constexpr bool IsBindless() const { return is_bindless; } - bool operator<(const Image& rhs) const { - return std::tie(offset, index, type, is_bindless) < - std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); + constexpr bool IsRead() const { + return is_read; + } + + constexpr bool IsWritten() const { + return is_written; + } + + constexpr std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; } private: - std::size_t offset{}; + u64 offset{}; std::size_t index{}; Tegra::Shader::ImageType type{}; bool is_bindless{}; + bool is_read{}; + bool is_written{}; }; struct GlobalMemoryBase { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index bcc9b79b6..0f891eace 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -95,7 +95,7 @@ public: return used_samplers; } - const std::set<Image>& GetImages() const { + const std::map<u64, Image>& GetImages() const { return used_images; } @@ -272,10 +272,10 @@ private: bool is_shadow); /// Accesses an image. - const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); /// Access a bindless image sampler. - const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -356,7 +356,7 @@ private: std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; - std::set<Image> used_images; + std::map<u64, Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_layer{}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index bcce8d863..5e497e49f 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -195,18 +195,18 @@ public: virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; - void MarkAsModified(const bool is_modified_, const u64 tick) { + void MarkAsModified(bool is_modified_, u64 tick) { is_modified = is_modified_ || is_target; modification_tick = tick; } - void MarkAsRenderTarget(const bool is_target, const u32 index) { - this->is_target = is_target; - this->index = index; + void MarkAsRenderTarget(bool is_target_, u32 index_) { + is_target = is_target_; + index = index_; } - void MarkAsPicked(const bool is_picked) { - this->is_picked = is_picked; + void MarkAsPicked(bool is_picked_) { + is_picked = is_picked_; } bool IsModified() const { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index fd5472451..1e4d3fb79 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -24,55 +24,62 @@ using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; using VideoCore::Surface::SurfaceType; -SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { +namespace { + +SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { switch (type) { - case Tegra::Shader::TextureType::Texture1D: { - if (is_array) - return SurfaceTarget::Texture1DArray; - else - return SurfaceTarget::Texture1D; - } - case Tegra::Shader::TextureType::Texture2D: { - if (is_array) - return SurfaceTarget::Texture2DArray; - else - return SurfaceTarget::Texture2D; - } - case Tegra::Shader::TextureType::Texture3D: { + case Tegra::Shader::TextureType::Texture1D: + return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; + case Tegra::Shader::TextureType::Texture2D: + return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + case Tegra::Shader::TextureType::Texture3D: ASSERT(!is_array); return SurfaceTarget::Texture3D; - } - case Tegra::Shader::TextureType::TextureCube: { - if (is_array) - return SurfaceTarget::TextureCubeArray; - else - return SurfaceTarget::TextureCubemap; - } - default: { + case Tegra::Shader::TextureType::TextureCube: + return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; + default: UNREACHABLE(); return SurfaceTarget::Texture2D; } +} + +SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { + switch (type) { + case Tegra::Shader::ImageType::Texture1D: + return SurfaceTarget::Texture1D; + case Tegra::Shader::ImageType::TextureBuffer: + return SurfaceTarget::TextureBuffer; + case Tegra::Shader::ImageType::Texture1DArray: + return SurfaceTarget::Texture1DArray; + case Tegra::Shader::ImageType::Texture2D: + return SurfaceTarget::Texture2D; + case Tegra::Shader::ImageType::Texture2DArray: + return SurfaceTarget::Texture2DArray; + case Tegra::Shader::ImageType::Texture3D: + return SurfaceTarget::Texture3D; + default: + UNREACHABLE(); + return SurfaceTarget::Texture2D; } } -namespace { constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); } + } // Anonymous namespace -SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config, +SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry) { SurfaceParams params; - params.is_tiled = config.tic.IsTiled(); - params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, - params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, - params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, - params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; - params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), - params.srgb_conversion); + params.is_tiled = tic.IsTiled(); + params.srgb_conversion = tic.IsSrgbConversionEnabled(); + params.block_width = params.is_tiled ? tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; + params.pixel_format = + PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); params.type = GetFormatType(params.pixel_format); if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { @@ -92,31 +99,72 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, } params.type = GetFormatType(params.pixel_format); } - params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. - if (!config.tic.IsBuffer()) { - params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = config.tic.Width(); - params.height = config.tic.Height(); - params.depth = config.tic.Depth(); - params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + if (tic.IsBuffer()) { + params.target = SurfaceTarget::TextureBuffer; + params.width = tic.Width(); + params.pitch = params.width * params.GetBytesPerPixel(); + params.height = 1; + params.depth = 1; + params.num_levels = 1; + params.emulated_levels = 1; + params.is_layered = false; + } else { + params.target = TextureTypeToSurfaceTarget(entry.GetType(), entry.IsArray()); + params.width = tic.Width(); + params.height = tic.Height(); + params.depth = tic.Depth(); + params.pitch = params.is_tiled ? 0 : tic.Pitch(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } - params.num_levels = config.tic.max_mip_level + 1; + params.num_levels = tic.max_mip_level + 1; params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); params.is_layered = params.IsLayered(); - } else { + } + return params; +} + +SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic, + const VideoCommon::Shader::Image& entry) { + SurfaceParams params; + params.is_tiled = tic.IsTiled(); + params.srgb_conversion = tic.IsSrgbConversionEnabled(); + params.block_width = params.is_tiled ? tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; + params.pixel_format = + PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); + params.type = GetFormatType(params.pixel_format); + params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.target = ImageTypeToSurfaceTarget(entry.GetType()); + // TODO: on 1DBuffer we should use the tic info. + if (tic.IsBuffer()) { params.target = SurfaceTarget::TextureBuffer; - params.width = config.tic.Width(); + params.width = tic.Width(); params.pitch = params.width * params.GetBytesPerPixel(); params.height = 1; params.depth = 1; params.num_levels = 1; params.emulated_levels = 1; params.is_layered = false; + } else { + params.width = tic.Width(); + params.height = tic.Height(); + params.depth = tic.Depth(); + params.pitch = params.is_tiled ? 0 : tic.Pitch(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.num_levels = tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); + params.is_layered = params.IsLayered(); } return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e7ef66ee2..c58e7f8a4 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -4,8 +4,6 @@ #pragma once -#include <map> - #include "common/alignment.h" #include "common/bit_util.h" #include "common/cityhash.h" @@ -23,10 +21,13 @@ using VideoCore::Surface::SurfaceCompression; class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config, + static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry); + /// Creates SurfaceCachedParams from an image configuration. + static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic, + const VideoCommon::Shader::Image& entry); + /// Creates SurfaceCachedParams for a depth buffer configuration. static SurfaceParams CreateForDepthBuffer( Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 467696a4c..57a1f5803 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -10,7 +10,7 @@ namespace VideoCommon { std::size_t ViewParams::Hash() const { - return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^ + return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ (static_cast<std::size_t>(base_level) << 24) ^ (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); } diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index 04ca5639b..b17fd11a9 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -13,8 +13,8 @@ namespace VideoCommon { struct ViewParams { - ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, - u32 base_level, u32 num_levels) + constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, + u32 num_layers, u32 base_level, u32 num_levels) : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, num_levels{num_levels} {} @@ -22,12 +22,6 @@ struct ViewParams { bool operator==(const ViewParams& rhs) const; - VideoCore::Surface::SurfaceTarget target{}; - u32 base_layer{}; - u32 num_layers{}; - u32 base_level{}; - u32 num_levels{}; - bool IsLayered() const { switch (target) { case VideoCore::Surface::SurfaceTarget::Texture1DArray: @@ -39,13 +33,19 @@ struct ViewParams { return false; } } + + VideoCore::Surface::SurfaceTarget target{}; + u32 base_layer{}; + u32 num_layers{}; + u32 base_level{}; + u32 num_levels{}; }; class ViewBase { public: - ViewBase(const ViewParams& params) : params{params} {} + constexpr explicit ViewBase(const ViewParams& params) : params{params} {} - const ViewParams& GetViewParams() const { + constexpr const ViewParams& GetViewParams() const { return params; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2ec0203d1..877c6635d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -89,14 +89,29 @@ public: } } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry) { std::lock_guard lock{mutex}; - const auto gpu_addr{config.tic.Address()}; + const auto gpu_addr{tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + const auto params{SurfaceParams::CreateForTexture(tic, entry)}; + const auto [surface, view] = GetSurface(gpu_addr, params, true, false); + if (guard_samplers) { + sampled_textures.push_back(surface); + } + return view; + } + + TView GetImageSurface(const Tegra::Texture::TICEntry& tic, + const VideoCommon::Shader::Image& entry) { + std::lock_guard lock{mutex}; + const auto gpu_addr{tic.Address()}; + if (!gpu_addr) { + return {}; + } + const auto params{SurfaceParams::CreateForImage(tic, entry)}; const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); |