diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 30 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 40 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 66 | ||||
-rw-r--r-- | src/video_core/shader/decode/memory.cpp | 22 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/surface_base.cpp | 2 |
7 files changed, 120 insertions, 48 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..f443ec0fe 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { Exch = 8, }; +enum class GlobalAtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + SafeAdd = 10, +}; + +enum class GlobalAtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32_FTZ_RN = 3, + F16x2_FTZ_RN = 4, + S64 = 5, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -958,6 +980,12 @@ union Instruction { } stg; union { + BitField<52, 4, GlobalAtomicOp> operation; + BitField<49, 3, GlobalAtomicType> type; + BitField<28, 20, s64> offset; + } atom; + + union { BitField<52, 4, AtomicOp> operation; BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; @@ -1690,6 +1718,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1994,6 +2023,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2996aaf08..a1ac3d7a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1019,7 +1019,6 @@ private: } return {{"gl_ViewportIndex", Type::Int}}; case 3: - UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); return {{"gl_PointSize", Type::Float}}; } return {}; @@ -1858,10 +1857,7 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { - ASSERT(stage == ShaderType::Compute); - auto& smem = std::get<SmemNode>(*operation[0]); - - return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e95eb069e..d4b81cd87 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) { return GL_NONE; } +GLenum GetComponent(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8Z24: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { if (params.IsBuffer()) { return; @@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } @@ -416,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou if (new_swizzle == swizzle) return; swizzle = new_swizzle; - const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), - GetSwizzleSource(z_source), - GetSwizzleSource(w_source)}; + const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), + GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; const GLuint handle = GetTexture(); - glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + const PixelFormat format = surface.GetSurfaceParams().pixel_format; + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + case PixelFormat::S8Z24: + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + GetComponent(format, x_source == SwizzleSource::R)); + break; + default: + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + break; + } } OGLTextureView CachedSurfaceView::CreateTextureView() const { @@ -529,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), + static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), + static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), + static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), + buffers, is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b53078721..1ab22251e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1123,15 +1123,7 @@ private: } if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - - Id offset = OpISub(t_uint, real, base); - offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); - return {OpLoad(t_float, - OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), - Type::Float}; + return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; } if (const auto lmem = std::get_if<LmemNode>(&*node)) { @@ -1142,10 +1134,7 @@ private: } if (const auto smem = std::get_if<SmemNode>(&*node)) { - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); - return {OpLoad(t_uint, pointer), Type::Uint}; + return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1339,20 +1328,10 @@ private: target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; + target = {GetSharedMemoryPointer(*smem), Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), - Type::Float}; + target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; } else { UNIMPLEMENTED(); @@ -1804,11 +1783,16 @@ private: return {}; } - Expression UAtomicAdd(Operation operation) { - const auto& smem = std::get<SmemNode>(*operation[0]); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); + Expression AtomicAdd(Operation operation) { + Id pointer; + if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { + pointer = GetSharedMemoryPointer(*smem); + } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { + pointer = GetGlobalMemoryPointer(*gmem); + } else { + UNREACHABLE(); + return {Constant(t_uint, 0), Type::Uint}; + } const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id semantics = Constant(t_uint, 0U); @@ -2243,6 +2227,22 @@ private: return {}; } + Id GetGlobalMemoryPointer(const GmemNode& gmem) { + const Id real = AsUint(Visit(gmem.GetRealAddress())); + const Id base = AsUint(Visit(gmem.GetBaseAddress())); + const Id diff = OpISub(t_uint, real, base); + const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + const Id buffer = global_buffers.at(gmem.GetDescriptor()); + return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); + } + + Id GetSharedMemoryPointer(const SmemNode& smem) { + ASSERT(stage == ShaderType::Compute); + Id address = AsUint(Visit(smem.GetAddress())); + address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); + return OpAccessChain(t_smem_uint, shared_memory, address); + } + static constexpr std::array operation_decompilers = { &SPIRVDecompiler::Assign, @@ -2389,7 +2389,7 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::AtomicAdd, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, @@ -2485,9 +2485,9 @@ private: Id t_smem_uint{}; - const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); + const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); + Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); const Id t_gmem_struct = MemberDecorate( Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 58744d29a..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,6 +19,8 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; @@ -360,6 +362,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::ATOM: { + UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", + static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", + static_cast<int>(instr.atom.type.Value())); + + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", static_cast<int>(instr.atoms.operation.Value())); @@ -373,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node memory = GetSharedMemory(std::move(address)); Node data = GetRegister(instr.gpr20); - Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..9af1f0228 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,7 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - UAtomicAdd, /// (smem, uint) -> uint + AtomicAdd, /// (memory, {u}int) -> {u}int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 829268b4c..84469b7ba 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -135,7 +135,7 @@ std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& i for (u32 level = 0; level < mipmaps; level++) { const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); + result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); } } return result; |