summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
m---------externals/sirit0
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp66
-rw-r--r--src/core/hle/kernel/address_arbiter.h19
-rw-r--r--src/core/hle/kernel/kernel.cpp6
-rw-r--r--src/core/hle/kernel/svc.cpp5
-rw-r--r--src/core/hle/kernel/svc_wrap.h10
-rw-r--r--src/video_core/engines/shader_bytecode.h18
-rw-r--r--src/video_core/rasterizer_accelerated.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp14
-rw-r--r--src/video_core/shader/decode/other.cpp6
-rw-r--r--src/video_core/shader/node.h2
14 files changed, 130 insertions, 36 deletions
diff --git a/externals/sirit b/externals/sirit
-Subproject e1a6729df7f11e33f6dc0939b18995a57c8bf3d
+Subproject 12f40a80324d7c154f19f25c448a5ce27d38cd1
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 8422d05e0..db189c8e3 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -17,10 +17,10 @@
#include "core/memory.h"
namespace Kernel {
-namespace {
+
// Wake up num_to_wake (or all) threads in a vector.
-void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) {
- auto& system = Core::System::GetInstance();
+void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
+ s32 num_to_wake) {
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
// them all.
std::size_t last = waiting_threads.size();
@@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3
for (std::size_t i = 0; i < last; i++) {
ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+ RemoveThread(waiting_threads[i]);
waiting_threads[i]->SetArbiterWaitAddress(0);
waiting_threads[i]->ResumeFromWait();
system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
}
}
-} // Anonymous namespace
AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
AddressArbiter::~AddressArbiter() = default;
@@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->SetArbiterWaitAddress(address);
+ InsertThread(SharedFrom(current_thread));
current_thread->SetStatus(ThreadStatus::WaitArb);
current_thread->InvalidateWakeupCallback();
current_thread->WakeAfterDelay(timeout);
@@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
return RESULT_TIMEOUT;
}
-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
- VAddr address) const {
-
- // Retrieve all threads that are waiting for this address.
- std::vector<std::shared_ptr<Thread>> threads;
- const auto& scheduler = system.GlobalScheduler();
- const auto& thread_list = scheduler.GetThreadList();
+void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
+ ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
+ RemoveThread(thread);
+ thread->SetArbiterWaitAddress(0);
+}
- for (const auto& thread : thread_list) {
- if (thread->GetArbiterWaitAddress() == address) {
- threads.push_back(thread);
+void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
+ const VAddr arb_addr = thread->GetArbiterWaitAddress();
+ std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+ auto it = thread_list.begin();
+ while (it != thread_list.end()) {
+ const std::shared_ptr<Thread>& current_thread = *it;
+ if (current_thread->GetPriority() >= thread->GetPriority()) {
+ thread_list.insert(it, thread);
+ return;
}
+ ++it;
}
+ thread_list.push_back(std::move(thread));
+}
- // Sort them by priority, such that the highest priority ones come first.
- std::sort(threads.begin(), threads.end(),
- [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) {
- return lhs->GetPriority() < rhs->GetPriority();
- });
+void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
+ const VAddr arb_addr = thread->GetArbiterWaitAddress();
+ std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+ auto it = thread_list.begin();
+ while (it != thread_list.end()) {
+ const std::shared_ptr<Thread>& current_thread = *it;
+ if (current_thread.get() == thread.get()) {
+ thread_list.erase(it);
+ return;
+ }
+ ++it;
+ }
+ UNREACHABLE();
+}
- return threads;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
+ std::vector<std::shared_ptr<Thread>> result;
+ std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
+ auto it = thread_list.begin();
+ while (it != thread_list.end()) {
+ std::shared_ptr<Thread> current_thread = *it;
+ result.push_back(std::move(current_thread));
+ ++it;
+ }
+ return result;
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index 1e1f00e60..386983e54 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,7 +4,9 @@
#pragma once
+#include <list>
#include <memory>
+#include <unordered_map>
#include <vector>
#include "common/common_types.h"
@@ -48,6 +50,9 @@ public:
/// Waits on an address with a particular arbitration type.
ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
+ /// Removes a thread from the container and resets its address arbiter adress to 0
+ void HandleWakeupThread(std::shared_ptr<Thread> thread);
+
private:
/// Signals an address being waited on.
ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
@@ -71,8 +76,20 @@ private:
// Waits on the given address with a timeout in nanoseconds
ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
+ /// Wake up num_to_wake (or all) threads in a vector.
+ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
+
+ /// Insert a thread into the address arbiter container
+ void InsertThread(std::shared_ptr<Thread> thread);
+
+ /// Removes a thread from the address arbiter container
+ void RemoveThread(std::shared_ptr<Thread> thread);
+
// Gets the threads waiting on an address.
- std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+ std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+
+ /// List of threads waiting for a address arbiter
+ std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
Core::System& system;
};
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 0b149067a..1d0783bd3 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
}
}
- if (thread->GetArbiterWaitAddress() != 0) {
- ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
- thread->SetArbiterWaitAddress(0);
+ if (thread->GetStatus() == ThreadStatus::WaitArb) {
+ auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
+ address_arbiter.HandleWakeupThread(thread);
}
if (resume) {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 35ff26c39..dbcdb0b88 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
}
/// Signal process wide key
-static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
- s32 target) {
+static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
condition_variable_addr, target);
@@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
system.PrepareReschedule(thread->GetProcessorID());
}
}
-
- return RESULT_SUCCESS;
}
// Wait for an address (via Address Arbiter)
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 9452e3b6f..29a2cfa9d 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) {
FuncReturn(system, retval);
}
-template <ResultCode func(Core::System&, u64, s32)>
-void SvcWrap(Core::System& system) {
- FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
-}
-
template <ResultCode func(Core::System&, u64, u32)>
void SvcWrap(Core::System& system) {
FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
@@ -327,6 +322,11 @@ void SvcWrap(Core::System& system) {
func(system, static_cast<s64>(Param(system, 0)));
}
+template <void func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+ func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
+}
+
template <void func(Core::System&, u64, u64)>
void SvcWrap(Core::System& system) {
func(system, Param(system, 0), Param(system, 1));
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7703a76a3..290d929df 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -384,6 +384,15 @@ enum class IsberdMode : u64 {
enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
+enum class MembarType : u64 {
+ CTA = 0,
+ GL = 1,
+ SYS = 2,
+ VC = 3,
+};
+
+enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
+
enum class HalfType : u64 {
H0_H1 = 0,
F32 = 1,
@@ -1546,6 +1555,11 @@ union Instruction {
} isberd;
union {
+ BitField<8, 2, MembarType> type;
+ BitField<0, 2, MembarUnknown> unknown;
+ } membar;
+
+ union {
BitField<48, 1, u64> signed_a;
BitField<38, 1, u64> is_byte_chunk_a;
BitField<36, 2, VideoType> type_a;
@@ -1669,6 +1683,7 @@ public:
IPA,
OUT_R, // Emit vertex/primitive
ISBERD,
+ MEMBAR,
VMAD,
VSETP,
FFMA_IMM, // Fused Multiply and Add
@@ -1930,7 +1945,7 @@ private:
INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
- INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
+ INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
@@ -1969,6 +1984,7 @@ private:
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+ INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
index fc6ecb899..d01db97da 100644
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -5,6 +5,7 @@
#include <mutex>
#include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>
#include "common/assert.h"
#include "common/common_types.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 88d78d2ad..f20967d85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -277,6 +277,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue;
}
+ // Currently this stages are not supported in the OpenGL backend.
+ // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+ if (program == Maxwell::ShaderProgram::TesselationControl) {
+ continue;
+ } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+ continue;
+ }
+
Shader shader{shader_cache.GetStageProgram(program)};
// Stage indices are 0 - 5
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 370bdf052..270a9dc2b 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -281,11 +281,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
if (variant.shared_memory_size > 0) {
// TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool
// avoids out of bound stores. Find out why shared memory size is being invalid.
- source += fmt::format("shared uint smem[{}];", variant.shared_memory_size);
+ source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
}
if (variant.local_memory_size > 0) {
- source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
+ source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
Common::AlignUp(variant.local_memory_size, 4) / 4);
}
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 9700c2ebe..fa7049bbe 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1992,6 +1992,11 @@ private:
return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
+ Expression MemoryBarrierGL(Operation) {
+ code.AddLine("memoryBarrier();");
+ return {};
+ }
+
struct Func final {
Func() = delete;
~Func() = delete;
@@ -2173,6 +2178,8 @@ private:
&GLSLDecompiler::ThreadId,
&GLSLDecompiler::ShuffleIndexed,
+
+ &GLSLDecompiler::MemoryBarrierGL,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 8ad89b58a..6227bc70b 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1971,6 +1971,18 @@ private:
return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
}
+ Expression MemoryBarrierGL(Operation) {
+ const auto scope = spv::Scope::Device;
+ const auto semantics =
+ spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
+ spv::MemorySemanticsMask::WorkgroupMemory |
+ spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
+
+ OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
+ Constant(t_uint, static_cast<u32>(semantics)));
+ return {};
+ }
+
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
const Id id = OpVariable(type, storage);
Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
@@ -2374,6 +2386,8 @@ private:
&SPIRVDecompiler::ThreadId,
&SPIRVDecompiler::ShuffleIndexed,
+
+ &SPIRVDecompiler::MemoryBarrierGL,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 5c802886b..7321698b2 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -257,6 +257,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
+ case OpCode::Id::MEMBAR: {
+ UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
+ UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
+ bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+ break;
+ }
case OpCode::Id::DEPBAR: {
LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 1a4d28ae9..abd40f582 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -189,6 +189,8 @@ enum class OperationCode {
ThreadId, /// () -> uint
ShuffleIndexed, /// (uint value, uint index) -> uint
+ MemoryBarrierGL, /// () -> void
+
Amount,
};