From 287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 13 Nov 2019 00:25:52 -0300 Subject: gl_shader_cache: Specialize local memory size for compute shaders Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters. --- src/video_core/engines/kepler_compute.h | 7 ++++++- src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 ++- src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 +++++ .../renderer_opengl/gl_shader_decompiler.cpp | 18 ++++++++---------- .../renderer_opengl/gl_shader_disk_cache.cpp | 4 ++-- src/video_core/renderer_opengl/gl_shader_disk_cache.h | 16 +++++++++------- 6 files changed, 32 insertions(+), 21 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index bd49c6627..c526287b7 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -178,7 +178,12 @@ public: BitField<24, 5, u32> gpr_alloc; }; - INSERT_PADDING_WORDS(0x11); + union { + BitField<0, 20, u32> local_crs_alloc; + BitField<24, 5, u32> sass_version; + }; + + INSERT_PADDING_WORDS(0x10); } launch_description{}; struct { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ebfe52e6d..d890076f8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z, launch_desc.shared_alloc); + launch_desc.block_dim_z, launch_desc.shared_alloc, + launch_desc.local_pos_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 982c4e23a..b23a982d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy source += fmt::format("shared uint smem[{}];", Common::AlignUp(variant.shared_memory_size, 4) / 4); } + + if (variant.local_memory_size > 0) { + source += fmt::format("#define LOCAL_MEMORY_SIZE {}", + Common::AlignUp(variant.local_memory_size, 4) / 4); + } } source += '\n'; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index fb2ba0905..fe016c05c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -510,10 +510,14 @@ private: } void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 local_memory_size = - stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); + if (stage == ProgramType::Compute) { + code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); + code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); + code.AddLine("#endif"); + return; + } + + const u64 local_memory_size = header.GetLocalMemorySize(); if (local_memory_size == 0) { return; } @@ -851,9 +855,6 @@ private: } if (const auto lmem = std::get_if(&*node)) { - if (stage == ProgramType::Compute) { - LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); - } return { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; @@ -1228,9 +1229,6 @@ private: } target = std::move(*output); } else if (const auto lmem = std::get_if(&*dest)) { - if (stage == ProgramType::Compute) { - LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); - } target = { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d2bb8502a..5ebcbbbba 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 8; +constexpr u32 NativeVersion = 9; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 32); +static_assert(sizeof(ProgramVariant) == 36); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 6f8e51364..28689f6c7 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -64,10 +64,10 @@ struct ProgramVariant final { : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} /// Compute constructor. - explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, - u32 shared_memory_size) noexcept + explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, + u32 local_memory_size) noexcept : block_x{block_x}, block_y{static_cast(block_y)}, block_z{static_cast(block_z)}, - shared_memory_size{shared_memory_size} {} + shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} // Graphics specific parameters. BaseBindings base_bindings{}; @@ -78,12 +78,13 @@ struct ProgramVariant final { u16 block_y{}; u16 block_z{}; u32 shared_memory_size{}; + u32 local_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, - shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, - rhs.block_x, rhs.block_y, rhs.block_z, - rhs.shared_memory_size); + shared_memory_size, local_memory_size) == + std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, + rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -133,7 +134,8 @@ struct hash { static_cast(variant.block_x) ^ (static_cast(variant.block_y) << 32) ^ (static_cast(variant.block_z) << 48) ^ - (static_cast(variant.shared_memory_size) << 16); + (static_cast(variant.shared_memory_size) << 16) ^ + (static_cast(variant.local_memory_size) << 36); } }; -- cgit v1.2.3