diff options
23 files changed, 616 insertions, 326 deletions
diff --git a/externals/boost b/externals/boost -Subproject 0b920df1c90f4eb84e2f2b10bee8804b0f6472b +Subproject 5e8300b76a627f3a1ba215304e04ead33b5bc23 diff --git a/externals/sirit b/externals/sirit -Subproject e1a6729df7f11e33f6dc0939b18995a57c8bf3d +Subproject 12f40a80324d7c154f19f25c448a5ce27d38cd1 diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 8422d05e0..db189c8e3 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -17,10 +17,10 @@ #include "core/memory.h" namespace Kernel { -namespace { + // Wake up num_to_wake (or all) threads in a vector. -void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) { - auto& system = Core::System::GetInstance(); +void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, + s32 num_to_wake) { // Only process up to 'target' threads, unless 'target' is <= 0, in which case process // them all. std::size_t last = waiting_threads.size(); @@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3 for (std::size_t i = 0; i < last; i++) { ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb); waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS); + RemoveThread(waiting_threads[i]); waiting_threads[i]->SetArbiterWaitAddress(0); waiting_threads[i]->ResumeFromWait(); system.PrepareReschedule(waiting_threads[i]->GetProcessorID()); } } -} // Anonymous namespace AddressArbiter::AddressArbiter(Core::System& system) : system{system} {} AddressArbiter::~AddressArbiter() = default; @@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); current_thread->SetArbiterWaitAddress(address); + InsertThread(SharedFrom(current_thread)); current_thread->SetStatus(ThreadStatus::WaitArb); current_thread->InvalidateWakeupCallback(); current_thread->WakeAfterDelay(timeout); @@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { return RESULT_TIMEOUT; } -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( - VAddr address) const { - - // Retrieve all threads that are waiting for this address. - std::vector<std::shared_ptr<Thread>> threads; - const auto& scheduler = system.GlobalScheduler(); - const auto& thread_list = scheduler.GetThreadList(); +void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { + ASSERT(thread->GetStatus() == ThreadStatus::WaitArb); + RemoveThread(thread); + thread->SetArbiterWaitAddress(0); +} - for (const auto& thread : thread_list) { - if (thread->GetArbiterWaitAddress() == address) { - threads.push_back(thread); +void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { + const VAddr arb_addr = thread->GetArbiterWaitAddress(); + std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; + auto it = thread_list.begin(); + while (it != thread_list.end()) { + const std::shared_ptr<Thread>& current_thread = *it; + if (current_thread->GetPriority() >= thread->GetPriority()) { + thread_list.insert(it, thread); + return; } + ++it; } + thread_list.push_back(std::move(thread)); +} - // Sort them by priority, such that the highest priority ones come first. - std::sort(threads.begin(), threads.end(), - [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) { - return lhs->GetPriority() < rhs->GetPriority(); - }); +void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { + const VAddr arb_addr = thread->GetArbiterWaitAddress(); + std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; + auto it = thread_list.begin(); + while (it != thread_list.end()) { + const std::shared_ptr<Thread>& current_thread = *it; + if (current_thread.get() == thread.get()) { + thread_list.erase(it); + return; + } + ++it; + } + UNREACHABLE(); +} - return threads; +std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { + std::vector<std::shared_ptr<Thread>> result; + std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; + auto it = thread_list.begin(); + while (it != thread_list.end()) { + std::shared_ptr<Thread> current_thread = *it; + result.push_back(std::move(current_thread)); + ++it; + } + return result; } } // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 1e1f00e60..386983e54 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -4,7 +4,9 @@ #pragma once +#include <list> #include <memory> +#include <unordered_map> #include <vector> #include "common/common_types.h" @@ -48,6 +50,9 @@ public: /// Waits on an address with a particular arbitration type. ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns); + /// Removes a thread from the container and resets its address arbiter adress to 0 + void HandleWakeupThread(std::shared_ptr<Thread> thread); + private: /// Signals an address being waited on. ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake); @@ -71,8 +76,20 @@ private: // Waits on the given address with a timeout in nanoseconds ResultCode WaitForAddressImpl(VAddr address, s64 timeout); + /// Wake up num_to_wake (or all) threads in a vector. + void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake); + + /// Insert a thread into the address arbiter container + void InsertThread(std::shared_ptr<Thread> thread); + + /// Removes a thread from the address arbiter container + void RemoveThread(std::shared_ptr<Thread> thread); + // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; + std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); + + /// List of threads waiting for a address arbiter + std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; Core::System& system; }; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 0b149067a..1d0783bd3 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } } - if (thread->GetArbiterWaitAddress() != 0) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitArb); - thread->SetArbiterWaitAddress(0); + if (thread->GetStatus() == ThreadStatus::WaitArb) { + auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter(); + address_arbiter.HandleWakeupThread(thread); } if (resume) { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 35ff26c39..dbcdb0b88 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add } /// Signal process wide key -static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, - s32 target) { +static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) { LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", condition_variable_addr, target); @@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var system.PrepareReschedule(thread->GetProcessorID()); } } - - return RESULT_SUCCESS; } // Wait for an address (via Address Arbiter) diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 9452e3b6f..29a2cfa9d 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) { FuncReturn(system, retval); } -template <ResultCode func(Core::System&, u64, s32)> -void SvcWrap(Core::System& system) { - FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw); -} - template <ResultCode func(Core::System&, u64, u32)> void SvcWrap(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); @@ -327,6 +322,11 @@ void SvcWrap(Core::System& system) { func(system, static_cast<s64>(Param(system, 0))); } +template <void func(Core::System&, u64, s32)> +void SvcWrap(Core::System& system) { + func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); +} + template <void func(Core::System&, u64, u64)> void SvcWrap(Core::System& system) { func(system, Param(system, 0), Param(system, 1)); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7703a76a3..d6a2cc8b8 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -384,6 +384,15 @@ enum class IsberdMode : u64 { enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; +enum class MembarType : u64 { + CTA = 0, + GL = 1, + SYS = 2, + VC = 3, +}; + +enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; + enum class HalfType : u64 { H0_H1 = 0, F32 = 1, @@ -1283,6 +1292,7 @@ union Instruction { BitField<50, 1, u64> dc_flag; BitField<51, 1, u64> aoffi_flag; BitField<52, 2, u64> component; + BitField<55, 1, u64> fp16_flag; bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { @@ -1546,6 +1556,11 @@ union Instruction { } isberd; union { + BitField<8, 2, MembarType> type; + BitField<0, 2, MembarUnknown> unknown; + } membar; + + union { BitField<48, 1, u64> signed_a; BitField<38, 1, u64> is_byte_chunk_a; BitField<36, 2, VideoType> type_a; @@ -1669,6 +1684,7 @@ public: IPA, OUT_R, // Emit vertex/primitive ISBERD, + MEMBAR, VMAD, VSETP, FFMA_IMM, // Fused Multiply and Add @@ -1930,7 +1946,7 @@ private: INST("111000100100----", Id::BRA, Type::Flow, "BRA"), INST("111000100101----", Id::BRX, Type::Flow, "BRX"), INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100---", Id::BRK, Type::Flow, "BRK"), + INST("111000110100----", Id::BRK, Type::Flow, "BRK"), INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), @@ -1957,7 +1973,7 @@ private: INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), + INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), @@ -1969,6 +1985,7 @@ private: INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), + INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index fc6ecb899..d01db97da 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -5,6 +5,7 @@ #include <mutex> #include <boost/icl/interval_map.hpp> +#include <boost/range/iterator_range.hpp> #include "common/assert.h" #include "common/common_types.h" diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 413d8546b..1a2e2a9f7 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,6 +5,7 @@ #include <algorithm> #include <array> #include <cstddef> +#include <cstring> #include <optional> #include <vector> @@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin Device::Device() : base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; const bool is_intel = vendor == "Intel"; + const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); @@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_broken_compute = is_intel; + has_broken_compute = is_intel_proprietary; has_fast_buffer_sub_data = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 88d78d2ad..f20967d85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -277,6 +277,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } + // Currently this stages are not supported in the OpenGL backend. + // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL + if (program == Maxwell::ShaderProgram::TesselationControl) { + continue; + } else if (program == Maxwell::ShaderProgram::TesselationEval) { + continue; + } + Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 370bdf052..270a9dc2b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -281,11 +281,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp if (variant.shared_memory_size > 0) { // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool // avoids out of bound stores. Find out why shared memory size is being invalid. - source += fmt::format("shared uint smem[{}];", variant.shared_memory_size); + source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); } if (variant.local_memory_size > 0) { - source += fmt::format("#define LOCAL_MEMORY_SIZE {}", + source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", Common::AlignUp(variant.local_memory_size, 4) / 4); } } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 9700c2ebe..d1ae4be6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1076,7 +1076,7 @@ private: } std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector<TextureIR>& extras) { + const std::vector<TextureIR>& extras, bool sepparate_dc = false) { constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -1091,7 +1091,8 @@ private: expr += "Offset"; } expr += '(' + GetSampler(meta->sampler) + ", "; - expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); + expr += coord_constructors.at(count + (has_array ? 1 : 0) + + (has_shadow && !sepparate_dc ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { expr += Visit(operation[i]).AsFloat(); @@ -1104,9 +1105,14 @@ private: expr += ", float(" + Visit(meta->array).AsInt() + ')'; } if (has_shadow) { - expr += ", " + Visit(meta->depth_compare).AsFloat(); + if (sepparate_dc) { + expr += "), " + Visit(meta->depth_compare).AsFloat(); + } else { + expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; + } + } else { + expr += ')'; } - expr += ')'; for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { @@ -1706,10 +1712,17 @@ private: ASSERT(meta); const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; - return {GenerateTexture(operation, "Gather", - {TextureAoffi{}, TextureArgument{type, meta->component}}) + - GetSwizzle(meta->element), - Type::Float}; + if (meta->sampler.IsShadow()) { + return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) + + GetSwizzle(meta->element), + Type::Float}; + } else { + return {GenerateTexture(operation, "Gather", + {TextureAoffi{}, TextureArgument{type, meta->component}}, + false) + + GetSwizzle(meta->element), + Type::Float}; + } } Expression TextureQueryDimensions(Operation operation) { @@ -1992,6 +2005,11 @@ private: return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; } + Expression MemoryBarrierGL(Operation) { + code.AddLine("memoryBarrier();"); + return {}; + } + struct Func final { Func() = delete; ~Func() = delete; @@ -2173,6 +2191,8 @@ private: &GLSLDecompiler::ThreadId, &GLSLDecompiler::ShuffleIndexed, + + &GLSLDecompiler::MemoryBarrierGL, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a57a564f7..bba16afaf 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,19 +24,21 @@ namespace OpenGL { -static const char vertex_shader[] = R"( -#version 150 core +namespace { -in vec2 vert_position; -in vec2 vert_tex_coord; -out vec2 frag_tex_coord; +constexpr char vertex_shader[] = R"( +#version 430 core + +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; +layout (location = 0) out vec2 frag_tex_coord; // This is a truncated 3x3 matrix for 2D transformations: // The upper-left 2x2 submatrix performs scaling/rotation/mirroring. // The third column performs translation. // The third row could be used for projection, which we don't need in 2D. It hence is assumed to // implicitly be [0, 0, 1] -uniform mat3x2 modelview_matrix; +layout (location = 0) uniform mat3x2 modelview_matrix; void main() { // Multiply input position by the rotscale part of the matrix and then manually translate by @@ -47,34 +49,29 @@ void main() { } )"; -static const char fragment_shader[] = R"( -#version 150 core +constexpr char fragment_shader[] = R"( +#version 430 core -in vec2 frag_tex_coord; -out vec4 color; +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; -uniform sampler2D color_texture; +layout (binding = 0) uniform sampler2D color_texture; void main() { - // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to - // support more framebuffer pixel formats. color = texture(color_texture, frag_tex_coord); } )"; -/** - * Vertex structure that the drawn screen rectangles are composed of. - */ +constexpr GLint PositionLocation = 0; +constexpr GLint TexCoordLocation = 1; +constexpr GLint ModelViewMatrixLocation = 0; + struct ScreenRectVertex { - ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) { - position[0] = x; - position[1] = y; - tex_coord[0] = u; - tex_coord[1] = v; - } + constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) + : position{{x, y}}, tex_coord{{u, v}} {} - GLfloat position[2]; - GLfloat tex_coord[2]; + std::array<GLfloat, 2> position; + std::array<GLfloat, 2> tex_coord; }; /** @@ -84,18 +81,82 @@ struct ScreenRectVertex { * The projection part of the matrix is trivial, hence these operations are represented * by a 3x2 matrix. */ -static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { +std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) { std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order // clang-format off - matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; - matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; // Last matrix row is implicitly assumed to be [0, 0, 1]. // clang-format on return matrix; } +const char* GetSource(GLenum source) { + switch (source) { + case GL_DEBUG_SOURCE_API: + return "API"; + case GL_DEBUG_SOURCE_WINDOW_SYSTEM: + return "WINDOW_SYSTEM"; + case GL_DEBUG_SOURCE_SHADER_COMPILER: + return "SHADER_COMPILER"; + case GL_DEBUG_SOURCE_THIRD_PARTY: + return "THIRD_PARTY"; + case GL_DEBUG_SOURCE_APPLICATION: + return "APPLICATION"; + case GL_DEBUG_SOURCE_OTHER: + return "OTHER"; + default: + UNREACHABLE(); + return "Unknown source"; + } +} + +const char* GetType(GLenum type) { + switch (type) { + case GL_DEBUG_TYPE_ERROR: + return "ERROR"; + case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR: + return "DEPRECATED_BEHAVIOR"; + case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR: + return "UNDEFINED_BEHAVIOR"; + case GL_DEBUG_TYPE_PORTABILITY: + return "PORTABILITY"; + case GL_DEBUG_TYPE_PERFORMANCE: + return "PERFORMANCE"; + case GL_DEBUG_TYPE_OTHER: + return "OTHER"; + case GL_DEBUG_TYPE_MARKER: + return "MARKER"; + default: + UNREACHABLE(); + return "Unknown type"; + } +} + +void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, + const GLchar* message, const void* user_param) { + const char format[] = "{} {} {}: {}"; + const char* const str_source = GetSource(source); + const char* const str_type = GetType(type); + + switch (severity) { + case GL_DEBUG_SEVERITY_HIGH: + LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message); + break; + case GL_DEBUG_SEVERITY_MEDIUM: + LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); + break; + case GL_DEBUG_SEVERITY_NOTIFICATION: + case GL_DEBUG_SEVERITY_LOW: + LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); + break; + } +} + +} // Anonymous namespace + RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} @@ -138,9 +199,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { prev_state.Apply(); } -/** - * Loads framebuffer from emulated memory into the active OpenGL texture. - */ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { // Framebuffer orientation handling framebuffer_transform_flags = framebuffer.transform_flags; @@ -181,19 +239,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } -/** - * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can - * be 1x1 but will stretch across whatever it's rendered on. - */ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture) { const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r}; glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); } -/** - * Initializes the OpenGL state and creates persistent objects. - */ void RendererOpenGL::InitOpenGLObjects() { glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); @@ -203,10 +254,6 @@ void RendererOpenGL::InitOpenGLObjects() { state.draw.shader_program = shader.handle; state.AllDirty(); state.Apply(); - uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); - uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); - attrib_position = glGetAttribLocation(shader.handle, "vert_position"); - attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord"); // Generate VBO handle for drawing vertex_buffer.Create(); @@ -217,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() { // Attach vertex data to VAO glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE, + glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE, offsetof(ScreenRectVertex, position)); - glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, + glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE, offsetof(ScreenRectVertex, tex_coord)); - glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0); - glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0); - glEnableVertexArrayAttrib(vertex_array.handle, attrib_position); - glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord); + glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0); + glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0); + glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation); + glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation); glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); @@ -331,18 +378,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, static_cast<f32>(screen_info.texture.height); } - std::array<ScreenRectVertex, 4> vertices = {{ + const std::array vertices = { ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v), ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v), ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v), ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), - }}; + }; state.textures[0] = screen_info.display_texture; state.framebuffer_srgb.enabled = screen_info.display_srgb; state.AllDirty(); state.Apply(); - glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); + glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); // Restore default state state.framebuffer_srgb.enabled = false; @@ -351,9 +398,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, state.Apply(); } -/** - * Draws the emulated screens to the emulator window. - */ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { if (renderer_settings.set_background_color) { // Update background color before drawing @@ -367,21 +411,17 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); // Set projection matrix - std::array<GLfloat, 3 * 2> ortho_matrix = - MakeOrthographicMatrix((float)layout.width, (float)layout.height); - glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); - - // Bind texture in Texture Unit 0 - glActiveTexture(GL_TEXTURE0); - glUniform1i(uniform_color_texture, 0); + const std::array ortho_matrix = + MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); - DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top, - (float)screen.GetWidth(), (float)screen.GetHeight()); + DrawScreenTriangles(screen_info, static_cast<float>(screen.left), + static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), + static_cast<float>(screen.GetHeight())); m_current_frame++; } -/// Updates the framerate void RendererOpenGL::UpdateFramerate() {} void RendererOpenGL::CaptureScreenshot() { @@ -418,63 +458,6 @@ void RendererOpenGL::CaptureScreenshot() { renderer_settings.screenshot_requested = false; } -static const char* GetSource(GLenum source) { -#define RET(s) \ - case GL_DEBUG_SOURCE_##s: \ - return #s - switch (source) { - RET(API); - RET(WINDOW_SYSTEM); - RET(SHADER_COMPILER); - RET(THIRD_PARTY); - RET(APPLICATION); - RET(OTHER); - default: - UNREACHABLE(); - return "Unknown source"; - } -#undef RET -} - -static const char* GetType(GLenum type) { -#define RET(t) \ - case GL_DEBUG_TYPE_##t: \ - return #t - switch (type) { - RET(ERROR); - RET(DEPRECATED_BEHAVIOR); - RET(UNDEFINED_BEHAVIOR); - RET(PORTABILITY); - RET(PERFORMANCE); - RET(OTHER); - RET(MARKER); - default: - UNREACHABLE(); - return "Unknown type"; - } -#undef RET -} - -static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, - GLsizei length, const GLchar* message, const void* user_param) { - const char format[] = "{} {} {}: {}"; - const char* const str_source = GetSource(source); - const char* const str_type = GetType(type); - - switch (severity) { - case GL_DEBUG_SEVERITY_HIGH: - LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message); - break; - case GL_DEBUG_SEVERITY_MEDIUM: - LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); - break; - case GL_DEBUG_SEVERITY_NOTIFICATION: - case GL_DEBUG_SEVERITY_LOW: - LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); - break; - } -} - bool RendererOpenGL::Init() { Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; @@ -495,7 +478,6 @@ bool RendererOpenGL::Init() { return true; } -/// Shutdown the renderer void RendererOpenGL::ShutDown() {} } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cf26628ca..b56328a7f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -59,21 +59,31 @@ public: void ShutDown() override; private: + /// Initializes the OpenGL state and creates persistent objects. void InitOpenGLObjects(); + void AddTelemetryFields(); + void CreateRasterizer(); void ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer); + + /// Draws the emulated screens to the emulator window. void DrawScreen(const Layout::FramebufferLayout& layout); + void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); + + /// Updates the framerate. void UpdateFramerate(); void CaptureScreenshot(); - // Loads framebuffer from emulated memory into the display information structure + /// Loads framebuffer from emulated memory into the active OpenGL texture. void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); - // Fills active OpenGL texture with the given RGBA color. + + /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture + /// can be 1x1 but will stretch across whatever it's rendered on. void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); @@ -94,14 +104,6 @@ private: /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; - // Shader uniform location indices - GLuint uniform_modelview_matrix; - GLuint uniform_color_texture; - - // Shader attribute input indices - GLuint attrib_position; - GLuint attrib_tex_coord; - /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; Common::Rectangle<int> framebuffer_crop_rect; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 7f0eb6b74..000e3616d 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -44,7 +44,8 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt return {}; } -vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { +vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, + Tegra::Texture::TextureFilter filter) { switch (wrap_mode) { case Tegra::Texture::WrapMode::Wrap: return vk::SamplerAddressMode::eRepeat; @@ -55,10 +56,15 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { case Tegra::Texture::WrapMode::Border: return vk::SamplerAddressMode::eClampToBorder; case Tegra::Texture::WrapMode::Clamp: - // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use - // eClampToBorder to get the border color of the texture, and then sample the edge to - // manually mix them. However the shader part of this is not yet implemented. - return vk::SamplerAddressMode::eClampToBorder; + // TODO(Rodrigo): Emulate GL_CLAMP properly + switch (filter) { + case Tegra::Texture::TextureFilter::Nearest: + return vk::SamplerAddressMode::eClampToEdge; + case Tegra::Texture::TextureFilter::Linear: + return vk::SamplerAddressMode::eClampToBorder; + } + UNREACHABLE(); + return vk::SamplerAddressMode::eClampToEdge; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: return vk::SamplerAddressMode::eMirrorClampToEdge; case Tegra::Texture::WrapMode::MirrorOnceBorder: @@ -96,106 +102,140 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar } // namespace Sampler +namespace { + +enum : u32 { Attachable = 1, Storage = 2 }; + struct FormatTuple { vk::Format format; ///< Vulkan format - bool attachable; ///< True when this format can be used as an attachment -}; - -static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {vk::Format::eA8B8G8R8UnormPack32, true}, // ABGR8U - {vk::Format::eUndefined, false}, // ABGR8S - {vk::Format::eUndefined, false}, // ABGR8UI - {vk::Format::eB5G6R5UnormPack16, false}, // B5G6R5U - {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U - {vk::Format::eUndefined, false}, // A1B5G5R5U - {vk::Format::eR8Unorm, true}, // R8U - {vk::Format::eUndefined, false}, // R8UI - {vk::Format::eUndefined, false}, // RGBA16F - {vk::Format::eUndefined, false}, // RGBA16U - {vk::Format::eUndefined, false}, // RGBA16UI - {vk::Format::eUndefined, false}, // R11FG11FB10F - {vk::Format::eUndefined, false}, // RGBA32UI - {vk::Format::eBc1RgbaUnormBlock, false}, // DXT1 - {vk::Format::eBc2UnormBlock, false}, // DXT23 - {vk::Format::eBc3UnormBlock, false}, // DXT45 - {vk::Format::eBc4UnormBlock, false}, // DXN1 - {vk::Format::eUndefined, false}, // DXN2UNORM - {vk::Format::eUndefined, false}, // DXN2SNORM - {vk::Format::eUndefined, false}, // BC7U - {vk::Format::eUndefined, false}, // BC6H_UF16 - {vk::Format::eUndefined, false}, // BC6H_SF16 - {vk::Format::eUndefined, false}, // ASTC_2D_4X4 - {vk::Format::eUndefined, false}, // BGRA8 - {vk::Format::eUndefined, false}, // RGBA32F - {vk::Format::eUndefined, false}, // RG32F - {vk::Format::eUndefined, false}, // R32F - {vk::Format::eUndefined, false}, // R16F - {vk::Format::eUndefined, false}, // R16U - {vk::Format::eUndefined, false}, // R16S - {vk::Format::eUndefined, false}, // R16UI - {vk::Format::eUndefined, false}, // R16I - {vk::Format::eUndefined, false}, // RG16 - {vk::Format::eUndefined, false}, // RG16F - {vk::Format::eUndefined, false}, // RG16UI - {vk::Format::eUndefined, false}, // RG16I - {vk::Format::eUndefined, false}, // RG16S - {vk::Format::eUndefined, false}, // RGB32F - {vk::Format::eA8B8G8R8SrgbPack32, true}, // RGBA8_SRGB - {vk::Format::eUndefined, false}, // RG8U - {vk::Format::eUndefined, false}, // RG8S - {vk::Format::eUndefined, false}, // RG32UI - {vk::Format::eUndefined, false}, // RGBX16F - {vk::Format::eUndefined, false}, // R32UI - {vk::Format::eUndefined, false}, // ASTC_2D_8X8 - {vk::Format::eUndefined, false}, // ASTC_2D_8X5 - {vk::Format::eUndefined, false}, // ASTC_2D_5X4 - - // Compressed sRGB formats - {vk::Format::eUndefined, false}, // BGRA8_SRGB - {vk::Format::eUndefined, false}, // DXT1_SRGB - {vk::Format::eUndefined, false}, // DXT23_SRGB - {vk::Format::eUndefined, false}, // DXT45_SRGB - {vk::Format::eUndefined, false}, // BC7U_SRGB - {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB - {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB - {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB - {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB - {vk::Format::eUndefined, false}, // ASTC_2D_5X5 - {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB - {vk::Format::eUndefined, false}, // ASTC_2D_10X8 - {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB + int usage; ///< Describes image format usage +} constexpr tex_format_tuples[] = { + {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage}, // ABGR8U + {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage}, // ABGR8S + {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI + {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U + {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U + {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle) + {vk::Format::eR8Unorm, Attachable | Storage}, // R8U + {vk::Format::eR8Uint, Attachable | Storage}, // R8UI + {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F + {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U + {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI + {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F + {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI + {vk::Format::eBc1RgbaUnormBlock, {}}, // DXT1 + {vk::Format::eBc2UnormBlock, {}}, // DXT23 + {vk::Format::eBc3UnormBlock, {}}, // DXT45 + {vk::Format::eBc4UnormBlock, {}}, // DXN1 + {vk::Format::eBc5UnormBlock, {}}, // DXN2UNORM + {vk::Format::eBc5SnormBlock, {}}, // DXN2SNORM + {vk::Format::eBc7UnormBlock, {}}, // BC7U + {vk::Format::eBc6HUfloatBlock, {}}, // BC6H_UF16 + {vk::Format::eBc6HSfloatBlock, {}}, // BC6H_SF16 + {vk::Format::eAstc4x4UnormBlock, {}}, // ASTC_2D_4X4 + {vk::Format::eB8G8R8A8Unorm, {}}, // BGRA8 + {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage}, // RGBA32F + {vk::Format::eR32G32Sfloat, Attachable | Storage}, // RG32F + {vk::Format::eR32Sfloat, Attachable | Storage}, // R32F + {vk::Format::eR16Sfloat, Attachable | Storage}, // R16F + {vk::Format::eR16Unorm, Attachable | Storage}, // R16U + {vk::Format::eUndefined, {}}, // R16S + {vk::Format::eUndefined, {}}, // R16UI + {vk::Format::eUndefined, {}}, // R16I + {vk::Format::eR16G16Unorm, Attachable | Storage}, // RG16 + {vk::Format::eR16G16Sfloat, Attachable | Storage}, // RG16F + {vk::Format::eUndefined, {}}, // RG16UI + {vk::Format::eUndefined, {}}, // RG16I + {vk::Format::eR16G16Snorm, Attachable | Storage}, // RG16S + {vk::Format::eUndefined, {}}, // RGB32F + {vk::Format::eR8G8B8A8Srgb, Attachable}, // RGBA8_SRGB + {vk::Format::eR8G8Unorm, Attachable | Storage}, // RG8U + {vk::Format::eR8G8Snorm, Attachable | Storage}, // RG8S + {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI + {vk::Format::eUndefined, {}}, // RGBX16F + {vk::Format::eR32Uint, Attachable | Storage}, // R32UI + {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 + {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 + {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 + {vk::Format::eUndefined, {}}, // BGRA8_SRGB + {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB + {vk::Format::eUndefined, {}}, // DXT23_SRGB + {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB + {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB + {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U + {vk::Format::eAstc4x4SrgbBlock, {}}, // ASTC_2D_4X4_SRGB + {vk::Format::eAstc8x8SrgbBlock, {}}, // ASTC_2D_8X8_SRGB + {vk::Format::eAstc8x5SrgbBlock, {}}, // ASTC_2D_8X5_SRGB + {vk::Format::eAstc5x4SrgbBlock, {}}, // ASTC_2D_5X4_SRGB + {vk::Format::eAstc5x5UnormBlock, {}}, // ASTC_2D_5X5 + {vk::Format::eAstc5x5SrgbBlock, {}}, // ASTC_2D_5X5_SRGB + {vk::Format::eAstc10x8UnormBlock, {}}, // ASTC_2D_10X8 + {vk::Format::eAstc10x8SrgbBlock, {}}, // ASTC_2D_10X8_SRGB + {vk::Format::eAstc6x6UnormBlock, {}}, // ASTC_2D_6X6 + {vk::Format::eAstc6x6SrgbBlock, {}}, // ASTC_2D_6X6_SRGB + {vk::Format::eAstc10x10UnormBlock, {}}, // ASTC_2D_10X10 + {vk::Format::eAstc10x10SrgbBlock, {}}, // ASTC_2D_10X10_SRGB + {vk::Format::eAstc12x12UnormBlock, {}}, // ASTC_2D_12X12 + {vk::Format::eAstc12x12SrgbBlock, {}}, // ASTC_2D_12X12_SRGB + {vk::Format::eAstc8x6UnormBlock, {}}, // ASTC_2D_8X6 + {vk::Format::eAstc8x6SrgbBlock, {}}, // ASTC_2D_8X6_SRGB + {vk::Format::eAstc6x5UnormBlock, {}}, // ASTC_2D_6X5 + {vk::Format::eAstc6x5SrgbBlock, {}}, // ASTC_2D_6X5_SRGB + {vk::Format::eE5B9G9R9UfloatPack32, {}}, // E5B9G9R9F // Depth formats - {vk::Format::eD32Sfloat, true}, // Z32F - {vk::Format::eD16Unorm, true}, // Z16 + {vk::Format::eD32Sfloat, Attachable}, // Z32F + {vk::Format::eD16Unorm, Attachable}, // Z16 // DepthStencil formats - {vk::Format::eD24UnormS8Uint, true}, // Z24S8 - {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated) - {vk::Format::eUndefined, false}, // Z32FS8 -}}; + {vk::Format::eD24UnormS8Uint, Attachable}, // Z24S8 + {vk::Format::eD24UnormS8Uint, Attachable}, // S8Z24 (emulated) + {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8 +}; +static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); -static constexpr bool IsZetaFormat(PixelFormat pixel_format) { +constexpr bool IsZetaFormat(PixelFormat pixel_format) { return pixel_format >= PixelFormat::MaxColorFormat && pixel_format < PixelFormat::MaxDepthStencilFormat; } -std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, - PixelFormat pixel_format) { - ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); +} // Anonymous namespace + +FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) { + ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); - const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)]; - UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined, - "Unimplemented texture format with pixel format={}", - static_cast<u32>(pixel_format)); + auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; + if (tuple.format == vk::Format::eUndefined) { + UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", + static_cast<u32>(pixel_format)); + return {vk::Format::eA8B8G8R8UnormPack32, true, true}; + } + + // Use ABGR8 on hardware that doesn't support ASTC natively + if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { + tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) + ? vk::Format::eA8B8G8R8SrgbPack32 + : vk::Format::eA8B8G8R8UnormPack32; + } + const bool attachable = tuple.usage & Attachable; + const bool storage = tuple.usage & Storage; - auto usage = vk::FormatFeatureFlagBits::eSampledImage | - vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc; - if (tuple.attachable) { - usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment - : vk::FormatFeatureFlagBits::eColorAttachment; + vk::FormatFeatureFlags usage; + if (format_type == FormatType::Buffer) { + usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer | + vk::FormatFeatureFlagBits::eUniformTexelBuffer; + } else { + usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst | + vk::FormatFeatureFlagBits::eTransferSrc; + if (attachable) { + usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment + : vk::FormatFeatureFlagBits::eColorAttachment; + } + if (storage) { + usage |= vk::FormatFeatureFlagBits::eStorageImage; + } } - return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; + return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { @@ -215,7 +255,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { return {}; } -vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { +vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, + Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: return vk::PrimitiveTopology::ePointList; @@ -227,6 +268,13 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return vk::PrimitiveTopology::eTriangleList; case Maxwell::PrimitiveTopology::TriangleStrip: return vk::PrimitiveTopology::eTriangleStrip; + case Maxwell::PrimitiveTopology::TriangleFan: + return vk::PrimitiveTopology::eTriangleFan; + case Maxwell::PrimitiveTopology::Quads: + // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases + return vk::PrimitiveTopology::eTriangleList; + case Maxwell::PrimitiveTopology::Patches: + return vk::PrimitiveTopology::ePatchList; default: UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); return {}; @@ -236,37 +284,111 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { case Maxwell::VertexAttribute::Type::SignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Snorm; + case Maxwell::VertexAttribute::Size::Size_8_8: + return vk::Format::eR8G8Snorm; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Snorm; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Snorm; + case Maxwell::VertexAttribute::Size::Size_16: + return vk::Format::eR16Snorm; + case Maxwell::VertexAttribute::Size::Size_16_16: + return vk::Format::eR16G16Snorm; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return vk::Format::eR16G16B16Snorm; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Snorm; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return vk::Format::eA2B10G10R10SnormPack32; + default: + break; + } break; case Maxwell::VertexAttribute::Type::UnsignedNorm: switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Unorm; + case Maxwell::VertexAttribute::Size::Size_8_8: + return vk::Format::eR8G8Unorm; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Unorm; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return vk::Format::eR8G8B8A8Unorm; + case Maxwell::VertexAttribute::Size::Size_16: + return vk::Format::eR16Unorm; + case Maxwell::VertexAttribute::Size::Size_16_16: + return vk::Format::eR16G16Unorm; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return vk::Format::eR16G16B16Unorm; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Unorm; default: break; } break; case Maxwell::VertexAttribute::Type::SignedInt: - break; + switch (size) { + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Sint; + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Sint; + case Maxwell::VertexAttribute::Size::Size_8_8: + return vk::Format::eR8G8Sint; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Sint; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Sint; + case Maxwell::VertexAttribute::Size::Size_32: + return vk::Format::eR32Sint; + default: + break; + } case Maxwell::VertexAttribute::Type::UnsignedInt: switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Uint; + case Maxwell::VertexAttribute::Size::Size_8_8: + return vk::Format::eR8G8Uint; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Uint; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Uint; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Uint; default: break; } case Maxwell::VertexAttribute::Type::UnsignedScaled: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8_8: + return vk::Format::eR8G8Uscaled; + default: + break; + } + break; case Maxwell::VertexAttribute::Type::SignedScaled: break; case Maxwell::VertexAttribute::Type::Float: switch (size) { - case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return vk::Format::eR32G32B32A32Sfloat; - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return vk::Format::eR32G32B32Sfloat; - case Maxwell::VertexAttribute::Size::Size_32_32: - return vk::Format::eR32G32Sfloat; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32: + return vk::Format::eR32G32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return vk::Format::eR32G32B32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return vk::Format::eR32G32B32A32Sfloat; + case Maxwell::VertexAttribute::Size::Size_16: + return vk::Format::eR16Sfloat; + case Maxwell::VertexAttribute::Size::Size_16_16: + return vk::Format::eR16G16Sfloat; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return vk::Format::eR16G16B16Sfloat; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Sfloat; default: break; } @@ -308,11 +430,14 @@ vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { return {}; } -vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) { +vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { switch (index_format) { case Maxwell::IndexFormat::UnsignedByte: - UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format"); - return vk::IndexType::eUint16; + if (!device.IsExtIndexTypeUint8Supported()) { + UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); + return vk::IndexType::eUint16; + } + return vk::IndexType::eUint8EXT; case Maxwell::IndexFormat::UnsignedShort: return vk::IndexType::eUint16; case Maxwell::IndexFormat::UnsignedInt: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 904a32e01..1534b738b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -4,7 +4,6 @@ #pragma once -#include <utility> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/declarations.h" @@ -23,24 +22,31 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter); vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); -vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode); +vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, + Tegra::Texture::TextureFilter filter); vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); } // namespace Sampler -std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, - PixelFormat pixel_format); +struct FormatInfo { + vk::Format format; + bool attachable; + bool storage; +}; + +FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); -vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); +vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device, + Maxwell::PrimitiveTopology topology); vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); -vk::IndexType IndexFormat(Maxwell::IndexFormat index_format); +vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 801826d3d..1ce583f75 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -46,9 +46,10 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), MaxwellToVK::Sampler::Filter(tsc.min_filter), MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v), - MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy, - max_anisotropy, tsc.depth_compare_enabled, + MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter), + MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter), + MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), + has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), unnormalized_coords); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8ad89b58a..6227bc70b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1971,6 +1971,18 @@ private: return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; } + Expression MemoryBarrierGL(Operation) { + const auto scope = spv::Scope::Device; + const auto semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | + spv::MemorySemanticsMask::WorkgroupMemory | + spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; + + OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)), + Constant(t_uint, static_cast<u32>(semantics))); + return {}; + } + Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { const Id id = OpVariable(type, storage); Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin)); @@ -2374,6 +2386,8 @@ private: &SPIRVDecompiler::ThreadId, &SPIRVDecompiler::ShuffleIndexed, + + &SPIRVDecompiler::MemoryBarrierGL, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 5c802886b..7321698b2 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -257,6 +257,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); break; } + case OpCode::Id::MEMBAR: { + UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); + UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); + bb.push_back(Operation(OperationCode::MemoryBarrierGL)); + break; + } case OpCode::Id::DEPBAR: { LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); break; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index da8e886df..994c05611 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -107,8 +107,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::TLD4S: { - UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); + const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); + UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented"); const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); const Node op_a = GetRegister(instr.gpr8); @@ -116,29 +116,40 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. std::vector<Node> coords; + Node dc_reg; if (depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const Node op_y = GetRegister(instr.gpr8.Value() + 1); coords.push_back(op_a); coords.push_back(op_y); - coords.push_back(op_b); + dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b; } else { coords.push_back(op_a); - coords.push_back(op_b); + if (uses_aoffi) { + const Node op_y = GetRegister(instr.gpr8.Value() + 1); + coords.push_back(op_y); + } else { + coords.push_back(op_b); + } + dc_reg = {}; } const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); const SamplerInfo info{TextureType::Texture2D, false, depth_compare}; - const auto& sampler = GetSampler(instr.sampler, info); + const Sampler& sampler = *GetSampler(instr.sampler, info); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } - WriteTexsInstructionFloat(bb, instr, values, true); + if (instr.tld4s.fp16_flag) { + WriteTexsInstructionHalfFloat(bb, instr, values, true); + } else { + WriteTexsInstructionFloat(bb, instr, values, true); + } break; } case OpCode::Id::TXD_B: @@ -154,9 +165,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - const auto& sampler = is_bindless - ? GetBindlessSampler(base_reg, {{texture_type, false, false}}) - : GetSampler(instr.sampler, {{texture_type, false, false}}); + const Sampler* sampler = is_bindless + ? GetBindlessSampler(base_reg, {{texture_type, false, false}}) + : GetSampler(instr.sampler, {{texture_type, false, false}}); + Node4 values; + if (sampler == nullptr) { + for (u32 element = 0; element < values.size(); ++element) { + values[element] = Immediate(0); + } + WriteTexInstructionFloat(bb, instr, values); + break; + } if (is_bindless) { base_reg++; } @@ -170,9 +189,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { derivates.push_back(GetRegister(derivate_reg + derivate + 1)); } - Node4 values; for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -187,9 +205,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. - const auto& sampler = + const Sampler* sampler = is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + if (sampler == nullptr) { + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.txq.IsComponentEnabled(element)) { + continue; + } + const Node value = Immediate(0); + SetTemporary(bb, indexer++, value); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } + u32 indexer = 0; switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { @@ -197,7 +230,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -223,9 +256,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = + const Sampler* sampler = is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + if (sampler == nullptr) { + u32 indexer = 0; + for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } + const Node value = Immediate(0); + SetTemporary(bb, indexer++, value); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } + std::vector<Node> coords; // TODO: Add coordinates for different samplers once other texture types are implemented. @@ -251,7 +299,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -307,7 +355,7 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample sampler->is_buffer != 0}; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, +const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, std::optional<SamplerInfo> sampler_info) { const auto offset = static_cast<u32>(sampler.index.Value()); const auto info = GetSamplerInfo(sampler_info, offset); @@ -319,21 +367,24 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, if (it != used_samplers.end()) { ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer); - return *it; + return &(*it); } // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, - info.is_buffer); + return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, + info.is_buffer); } -const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, buffer, offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); ASSERT(base_sampler != nullptr); + if (base_sampler == nullptr) { + return nullptr; + } const auto info = GetSamplerInfo(sampler_info, offset, buffer); @@ -346,13 +397,13 @@ const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, if (it != used_samplers.end()) { ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); - return *it; + return &(*it); } // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer); } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -395,14 +446,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const } void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { + const Node4& components, bool ignore_mask) { // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half // float instruction). Node4 values; u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) + if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) continue; values[dest_elem++] = components[component]; } @@ -438,8 +489,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, "This method is not supported."); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - const auto& sampler = + const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); + Node4 values; + if (sampler == nullptr) { + for (u32 element = 0; element < values.size(); ++element) { + values[element] = Immediate(0); + } + return values; + } const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -478,10 +536,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, } } - Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -594,8 +651,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); const SamplerInfo info{texture_type, is_array, depth_compare, false}; - const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) - : GetSampler(instr.sampler, info); + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + : GetSampler(instr.sampler, info); + Node4 values; + if (sampler == nullptr) { + for (u32 element = 0; element < values.size(); ++element) { + values[element] = Immediate(0); + } + return values; + } std::vector<Node> aoffi; if (is_aoffi) { @@ -610,10 +674,9 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) : Immediate(static_cast<u32>(instr.tld4.component)); - Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component, + MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -642,7 +705,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler); + const auto& sampler = *GetSampler(instr.sampler); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -655,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { } Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - const auto& sampler = GetSampler(instr.sampler); + const Sampler& sampler = *GetSampler(instr.sampler); const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 1a4d28ae9..abd40f582 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -189,6 +189,8 @@ enum class OperationCode { ThreadId, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint + MemoryBarrierGL, /// () -> void + Amount, }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 580f84fcb..04ae5f822 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -313,11 +313,11 @@ private: std::optional<u32> buffer = std::nullopt); /// Accesses a texture sampler - const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, + const Sampler* GetSampler(const Tegra::Shader::Sampler& sampler, std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses a texture sampler for a bindless texture. - const Sampler& GetBindlessSampler(Tegra::Shader::Register reg, + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses an image. @@ -338,7 +338,7 @@ private: void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components, bool ignore_mask = false); void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); + const Node4& components, bool ignore_mask = false); Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |