diff options
Diffstat (limited to 'src')
91 files changed, 2438 insertions, 706 deletions
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp index 50d2a1ed3..8619a3f03 100644 --- a/src/audio_core/audio_out.cpp +++ b/src/audio_core/audio_out.cpp @@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) { return {}; } -StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, +StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, + u32 num_channels, std::string&& name, Stream::ReleaseCallback&& release_callback) { if (!sink) { sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id); } return std::make_shared<Stream>( - sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback), + core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback), sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name)); } diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h index df9607ac7..b07588287 100644 --- a/src/audio_core/audio_out.h +++ b/src/audio_core/audio_out.h @@ -13,6 +13,10 @@ #include "audio_core/stream.h" #include "common/common_types.h" +namespace Core::Timing { +class CoreTiming; +} + namespace AudioCore { /** @@ -21,8 +25,8 @@ namespace AudioCore { class AudioOut { public: /// Opens a new audio stream - StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name, - Stream::ReleaseCallback&& release_callback); + StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels, + std::string&& name, Stream::ReleaseCallback&& release_callback); /// Returns a vector of recently released buffers specified by tag for the specified stream std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count); diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 00c026511..9a0939883 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -8,6 +8,7 @@ #include "audio_core/codec.h" #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/kernel/writable_event.h" #include "core/memory.h" @@ -71,14 +72,14 @@ private: EffectOutStatus out_status{}; EffectInStatus info{}; }; -AudioRenderer::AudioRenderer(AudioRendererParameter params, +AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), effects(params.effect_count) { audio_out = std::make_unique<AudioCore::AudioOut>(); - stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer", - [=]() { buffer_event->Signal(); }); + stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, + "AudioRenderer", [=]() { buffer_event->Signal(); }); audio_out->StartStream(stream); QueueMixedBuffer(0); diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 7826881bf..201ec7a3c 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -14,6 +14,10 @@ #include "common/swap.h" #include "core/hle/kernel/object.h" +namespace Core::Timing { +class CoreTiming; +} + namespace Kernel { class WritableEvent; } @@ -208,7 +212,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size class AudioRenderer { public: - AudioRenderer(AudioRendererParameter params, + AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); ~AudioRenderer(); diff --git a/src/audio_core/buffer.h b/src/audio_core/buffer.h index a323b23ec..5ee09e9aa 100644 --- a/src/audio_core/buffer.h +++ b/src/audio_core/buffer.h @@ -21,7 +21,7 @@ public: Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {} /// Returns the raw audio data for the buffer - std::vector<s16>& Samples() { + std::vector<s16>& GetSamples() { return samples; } diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 8ab5649df..4b66a6786 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -32,12 +32,12 @@ u32 Stream::GetNumChannels() const { return {}; } -Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, - SinkStream& sink_stream, std::string&& name_) +Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format, + ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_) : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)}, - sink_stream{sink_stream}, name{std::move(name_)} { + sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} { - release_event = Core::Timing::RegisterEvent( + release_event = core_timing.RegisterEvent( name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); }); } @@ -95,12 +95,11 @@ void Stream::PlayNextBuffer() { active_buffer = queued_buffers.front(); queued_buffers.pop(); - VolumeAdjustSamples(active_buffer->Samples()); + VolumeAdjustSamples(active_buffer->GetSamples()); sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); - Core::Timing::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, - {}); + core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {}); } void Stream::ReleaseActiveBuffer() { diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index caa775544..05071243b 100644 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h @@ -14,8 +14,9 @@ #include "common/common_types.h" namespace Core::Timing { +class CoreTiming; struct EventType; -} +} // namespace Core::Timing namespace AudioCore { @@ -42,8 +43,8 @@ public: /// Callback function type, used to change guest state on a buffer being released using ReleaseCallback = std::function<void()>; - Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback, - SinkStream& sink_stream, std::string&& name_); + Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format, + ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_); /// Plays the audio stream void Play(); @@ -100,6 +101,7 @@ private: std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream SinkStream& sink_stream; ///< Output sink for the stream + Core::Timing::CoreTiming& core_timing; ///< Core timing instance. std::string name; ///< Name of the stream, must be unique }; diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 12f6d0114..b369f199f 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -40,9 +40,7 @@ public: const Impl& operator=(Impl const&) = delete; void PushEntry(Entry e) { - std::lock_guard<std::mutex> lock(message_mutex); message_queue.Push(std::move(e)); - message_cv.notify_one(); } void AddBackend(std::unique_ptr<Backend> backend) { @@ -86,15 +84,13 @@ private: } }; while (true) { - { - std::unique_lock<std::mutex> lock(message_mutex); - message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); }); - } - if (!running) { + entry = message_queue.PopWait(); + if (entry.final_entry) { break; } write_logs(entry); } + // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case // where a system is repeatedly spamming logs even on close. const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100; @@ -106,14 +102,13 @@ private: } ~Impl() { - running = false; - message_cv.notify_one(); + Entry entry; + entry.final_entry = true; + message_queue.Push(entry); backend_thread.join(); } - std::atomic_bool running{true}; - std::mutex message_mutex, writing_mutex; - std::condition_variable message_cv; + std::mutex writing_mutex; std::thread backend_thread; std::vector<std::unique_ptr<Backend>> backends; Common::MPSCQueue<Log::Entry> message_queue; @@ -232,6 +227,7 @@ void DebuggerBackend::Write(const Entry& entry) { CLS(Render) \ SUB(Render, Software) \ SUB(Render, OpenGL) \ + SUB(Render, Vulkan) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index 91bb0c309..a31ee6968 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -27,6 +27,7 @@ struct Entry { unsigned int line_num; std::string function; std::string message; + bool final_entry = false; Entry() = default; Entry(Entry&& o) = default; diff --git a/src/common/logging/log.h b/src/common/logging/log.h index d4ec31ec3..8ed6d5050 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -112,6 +112,7 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend + Render_Vulkan, ///< Vulkan backend Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/common/swap.h b/src/common/swap.h index 32af0b6ac..0e219747f 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -28,8 +28,8 @@ #include <cstring> #include "common/common_types.h" -// GCC 4.6+ -#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +// GCC +#ifdef __GNUC__ #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) #define COMMON_LITTLE_ENDIAN 1 @@ -38,7 +38,7 @@ #endif // LLVM/clang -#elif __clang__ +#elif defined(__clang__) #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) #define COMMON_LITTLE_ENDIAN 1 diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h index f553efdc9..821e8536a 100644 --- a/src/common/threadsafe_queue.h +++ b/src/common/threadsafe_queue.h @@ -8,6 +8,7 @@ // single reader, single writer queue #include <atomic> +#include <condition_variable> #include <cstddef> #include <mutex> #include <utility> @@ -45,6 +46,7 @@ public: ElementPtr* new_ptr = new ElementPtr(); write_ptr->next.store(new_ptr, std::memory_order_release); write_ptr = new_ptr; + cv.notify_one(); ++size; } @@ -74,6 +76,16 @@ public: return true; } + T PopWait() { + if (Empty()) { + std::unique_lock<std::mutex> lock(cv_mutex); + cv.wait(lock, [this]() { return !Empty(); }); + } + T t; + Pop(t); + return t; + } + // not thread-safe void Clear() { size.store(0); @@ -101,6 +113,8 @@ private: ElementPtr* write_ptr; ElementPtr* read_ptr; std::atomic_size_t size{0}; + std::mutex cv_mutex; + std::condition_variable cv; }; // a simple thread-safe, @@ -135,6 +149,10 @@ public: return spsc_queue.Pop(t); } + T PopWait() { + return spsc_queue.PopWait(); + } + // not thread-safe void Clear() { spsc_queue.Clear(); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f61bcd40d..988356c65 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -400,6 +400,10 @@ add_library(core STATIC hle/service/time/time.h hle/service/usb/usb.cpp hle/service/usb/usb.h + hle/service/vi/display/vi_display.cpp + hle/service/vi/display/vi_display.h + hle/service/vi/layer/vi_layer.cpp + hle/service/vi/layer/vi_layer.h hle/service/vi/vi.cpp hle/service/vi/vi.h hle/service/vi/vi_m.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index f28951f8a..9b7ca4030 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -112,14 +112,14 @@ public: // Always execute at least one tick. amortized_ticks = std::max<u64>(amortized_ticks, 1); - Timing::AddTicks(amortized_ticks); + parent.core_timing.AddTicks(amortized_ticks); num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - return std::max(Timing::GetDowncount(), 0); + return std::max(parent.core_timing.GetDowncount(), 0); } u64 GetCNTPCT() override { - return Timing::GetTicks(); + return parent.core_timing.GetTicks(); } ARM_Dynarmic& parent; @@ -172,8 +172,10 @@ void ARM_Dynarmic::Step() { cb->InterpreterFallback(jit->GetPC(), 1); } -ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index}, +ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) + : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing}, + core_index{core_index}, core_timing{core_timing}, exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { ThreadContext ctx{}; inner_unicorn.SaveContext(ctx); diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 512bf8ce9..6cc458296 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -16,6 +16,10 @@ namespace Memory { struct PageTable; } +namespace Core::Timing { +class CoreTiming; +} + namespace Core { class ARM_Dynarmic_Callbacks; @@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor; class ARM_Dynarmic final : public ARM_Interface { public: - ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index); ~ARM_Dynarmic(); void MapBackingMemory(VAddr address, std::size_t size, u8* memory, @@ -62,6 +67,7 @@ private: ARM_Unicorn inner_unicorn; std::size_t core_index; + Timing::CoreTiming& core_timing; DynarmicExclusiveMonitor& exclusive_monitor; Memory::PageTable* current_page_table = nullptr; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index c36c15c02..a542a098b 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si return {}; } -ARM_Unicorn::ARM_Unicorn() { +ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} { CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc)); auto fpv = 3 << 20; @@ -177,7 +177,7 @@ void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000, 0)); } else { - ExecuteInstructions(std::max(Timing::GetDowncount(), 0)); + ExecuteInstructions(std::max(core_timing.GetDowncount(), 0)); } } @@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64)); void ARM_Unicorn::ExecuteInstructions(int num_instructions) { MICROPROFILE_SCOPE(ARM_Jit_Unicorn); CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); - Timing::AddTicks(num_instructions); + core_timing.AddTicks(num_instructions); if (GDBStub::IsServerEnabled()) { if (last_bkpt_hit) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 75761950b..dbd6955ea 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -9,12 +9,17 @@ #include "core/arm/arm_interface.h" #include "core/gdbstub/gdbstub.h" +namespace Core::Timing { +class CoreTiming; +} + namespace Core { class ARM_Unicorn final : public ARM_Interface { public: - ARM_Unicorn(); + explicit ARM_Unicorn(Timing::CoreTiming& core_timing); ~ARM_Unicorn(); + void MapBackingMemory(VAddr address, std::size_t size, u8* memory, Kernel::VMAPermission perms) override; void UnmapMemory(VAddr address, std::size_t size) override; @@ -43,6 +48,7 @@ public: private: uc_engine* uc{}; + Timing::CoreTiming& core_timing; GDBStub::BreakpointAddress last_bkpt{}; bool last_bkpt_hit; }; diff --git a/src/core/core.cpp b/src/core/core.cpp index 4d9d21ee4..ab7181a05 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -94,8 +94,8 @@ struct System::Impl { ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) { LOG_DEBUG(HW_Memory, "initialized OK"); - Timing::Init(); - kernel.Initialize(); + core_timing.Initialize(); + kernel.Initialize(core_timing); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch()); @@ -120,7 +120,7 @@ struct System::Impl { telemetry_session = std::make_unique<Core::TelemetrySession>(); service_manager = std::make_shared<Service::SM::ServiceManager>(); - Service::Init(service_manager, *virtual_filesystem); + Service::Init(service_manager, system, *virtual_filesystem); GDBStub::Init(); renderer = VideoCore::CreateRenderer(emu_window, system); @@ -128,7 +128,7 @@ struct System::Impl { return ResultStatus::ErrorVideoCore; } - gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer()); + gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); cpu_core_manager.Initialize(system); is_powered_on = true; @@ -205,7 +205,7 @@ struct System::Impl { // Shutdown kernel and core timing kernel.Shutdown(); - Timing::Shutdown(); + core_timing.Shutdown(); // Close app loader app_loader.reset(); @@ -232,9 +232,10 @@ struct System::Impl { } PerfStatsResults GetAndResetPerfStats() { - return perf_stats.GetAndResetStats(Timing::GetGlobalTimeUs()); + return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs()); } + Timing::CoreTiming core_timing; Kernel::KernelCore kernel; /// RealVfsFilesystem instance FileSys::VirtualFilesystem virtual_filesystem; @@ -396,6 +397,14 @@ const Kernel::KernelCore& System::Kernel() const { return impl->kernel; } +Timing::CoreTiming& System::CoreTiming() { + return impl->core_timing; +} + +const Timing::CoreTiming& System::CoreTiming() const { + return impl->core_timing; +} + Core::PerfStats& System::GetPerfStats() { return impl->perf_stats; } diff --git a/src/core/core.h b/src/core/core.h index 511a5ad3a..d720013f7 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -47,6 +47,10 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore +namespace Core::Timing { +class CoreTiming; +} + namespace Core { class ARM_Interface; @@ -205,6 +209,12 @@ public: /// Provides a constant pointer to the current process. const Kernel::Process* CurrentProcess() const; + /// Provides a reference to the core timing instance. + Timing::CoreTiming& CoreTiming(); + + /// Provides a constant reference to the core timing instance. + const Timing::CoreTiming& CoreTiming() const; + /// Provides a reference to the kernel instance. Kernel::KernelCore& Kernel(); diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 452366250..54aa21a3a 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -49,17 +49,18 @@ bool CpuBarrier::Rendezvous() { return false; } -Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index) - : cpu_barrier{cpu_barrier}, core_index{core_index} { +Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, + CpuBarrier& cpu_barrier, std::size_t core_index) + : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { if (Settings::values.use_cpu_jit) { #ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index); + arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); #else arm_interface = std::make_unique<ARM_Unicorn>(); LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif } else { - arm_interface = std::make_unique<ARM_Unicorn>(); + arm_interface = std::make_unique<ARM_Unicorn>(core_timing); } scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); @@ -93,14 +94,14 @@ void Cpu::RunLoop(bool tight_loop) { if (IsMainCore()) { // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. - Timing::Idle(); - Timing::Advance(); + core_timing.Idle(); + core_timing.Advance(); } PrepareReschedule(); } else { if (IsMainCore()) { - Timing::Advance(); + core_timing.Advance(); } if (tight_loop) { diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 1d2bdc6cd..e2204c6b0 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h @@ -15,6 +15,10 @@ namespace Kernel { class Scheduler; } +namespace Core::Timing { +class CoreTiming; +} + namespace Core { class ARM_Interface; @@ -41,7 +45,8 @@ private: class Cpu { public: - Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index); + Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, + CpuBarrier& cpu_barrier, std::size_t core_index); ~Cpu(); void RunLoop(bool tight_loop = true); @@ -82,6 +87,7 @@ private: std::unique_ptr<ARM_Interface> arm_interface; CpuBarrier& cpu_barrier; std::unique_ptr<Kernel::Scheduler> scheduler; + Timing::CoreTiming& core_timing; std::atomic<bool> reschedule_pending = false; std::size_t core_index; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 4ea00c277..a0dd5db24 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -8,94 +8,42 @@ #include <mutex> #include <string> #include <tuple> -#include <unordered_map> -#include <vector> + #include "common/assert.h" #include "common/thread.h" -#include "common/threadsafe_queue.h" #include "core/core_timing_util.h" namespace Core::Timing { -static s64 global_timer; -static int slice_length; -static int downcount; - -struct EventType { - TimedCallback callback; - const std::string* name; -}; +constexpr int MAX_SLICE_LENGTH = 20000; -struct Event { +struct CoreTiming::Event { s64 time; u64 fifo_order; u64 userdata; const EventType* type; -}; - -// Sort by time, unless the times are the same, in which case sort by the order added to the queue -static bool operator>(const Event& left, const Event& right) { - return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order); -} - -static bool operator<(const Event& left, const Event& right) { - return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order); -} - -// unordered_map stores each element separately as a linked list node so pointers to elements -// remain stable regardless of rehashes/resizing. -static std::unordered_map<std::string, EventType> event_types; -// The queue is a min-heap using std::make_heap/push_heap/pop_heap. -// We don't use std::priority_queue because we need to be able to serialize, unserialize and -// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated -// by the standard adaptor class. -static std::vector<Event> event_queue; -static u64 event_fifo_id; -// the queue for storing the events from other threads threadsafe until they will be added -// to the event_queue by the emu thread -static Common::MPSCQueue<Event> ts_queue; - -// the queue for unscheduling the events from other threads threadsafe -static Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue; - -constexpr int MAX_SLICE_LENGTH = 20000; - -static s64 idled_cycles; - -// Are we in a function that has been called from Advance() -// If events are sheduled from a function that gets called from Advance(), -// don't change slice_length and downcount. -static bool is_global_timer_sane; - -static EventType* ev_lost = nullptr; - -EventType* RegisterEvent(const std::string& name, TimedCallback callback) { - // check for existing type with same name. - // we want event type names to remain unique so that we can use them for serialization. - ASSERT_MSG(event_types.find(name) == event_types.end(), - "CoreTiming Event \"{}\" is already registered. Events should only be registered " - "during Init to avoid breaking save states.", - name.c_str()); + // Sort by time, unless the times are the same, in which case sort by + // the order added to the queue + friend bool operator>(const Event& left, const Event& right) { + return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order); + } - auto info = event_types.emplace(name, EventType{callback, nullptr}); - EventType* event_type = &info.first->second; - event_type->name = &info.first->first; - return event_type; -} + friend bool operator<(const Event& left, const Event& right) { + return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order); + } +}; -void UnregisterAllEvents() { - ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending"); - event_types.clear(); -} +CoreTiming::CoreTiming() = default; +CoreTiming::~CoreTiming() = default; -void Init() { +void CoreTiming::Initialize() { downcount = MAX_SLICE_LENGTH; slice_length = MAX_SLICE_LENGTH; global_timer = 0; idled_cycles = 0; - // The time between CoreTiming being intialized and the first call to Advance() is considered + // The time between CoreTiming being initialized and the first call to Advance() is considered // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before // executing the first cycle of each slice to prepare the slice length and downcount for // that slice. @@ -107,50 +55,51 @@ void Init() { ev_lost = RegisterEvent("_lost_event", empty_timed_callback); } -void Shutdown() { +void CoreTiming::Shutdown() { MoveEvents(); ClearPendingEvents(); UnregisterAllEvents(); } -// This should only be called from the CPU thread. If you are calling -// it from any other thread, you are doing something evil -u64 GetTicks() { - u64 ticks = static_cast<u64>(global_timer); - if (!is_global_timer_sane) { - ticks += slice_length - downcount; - } - return ticks; -} - -void AddTicks(u64 ticks) { - downcount -= static_cast<int>(ticks); -} +EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) { + // check for existing type with same name. + // we want event type names to remain unique so that we can use them for serialization. + ASSERT_MSG(event_types.find(name) == event_types.end(), + "CoreTiming Event \"{}\" is already registered. Events should only be registered " + "during Init to avoid breaking save states.", + name.c_str()); -u64 GetIdleTicks() { - return static_cast<u64>(idled_cycles); + auto info = event_types.emplace(name, EventType{callback, nullptr}); + EventType* event_type = &info.first->second; + event_type->name = &info.first->first; + return event_type; } -void ClearPendingEvents() { - event_queue.clear(); +void CoreTiming::UnregisterAllEvents() { + ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending"); + event_types.clear(); } -void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { +void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { ASSERT(event_type != nullptr); - s64 timeout = GetTicks() + cycles_into_future; + const s64 timeout = GetTicks() + cycles_into_future; + // If this event needs to be scheduled before the next advance(), force one early - if (!is_global_timer_sane) + if (!is_global_timer_sane) { ForceExceptionCheck(cycles_into_future); + } + event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } -void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) { +void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, + u64 userdata) { ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type}); } -void UnscheduleEvent(const EventType* event_type, u64 userdata) { - auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { +void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) { + const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type == event_type && e.userdata == userdata; }); @@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) { } } -void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) { +void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) { unschedule_queue.Push(std::make_pair(event_type, userdata)); } -void RemoveEvent(const EventType* event_type) { - auto itr = std::remove_if(event_queue.begin(), event_queue.end(), - [&](const Event& e) { return e.type == event_type; }); +u64 CoreTiming::GetTicks() const { + u64 ticks = static_cast<u64>(global_timer); + if (!is_global_timer_sane) { + ticks += slice_length - downcount; + } + return ticks; +} + +u64 CoreTiming::GetIdleTicks() const { + return static_cast<u64>(idled_cycles); +} + +void CoreTiming::AddTicks(u64 ticks) { + downcount -= static_cast<int>(ticks); +} + +void CoreTiming::ClearPendingEvents() { + event_queue.clear(); +} + +void CoreTiming::RemoveEvent(const EventType* event_type) { + const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), + [&](const Event& e) { return e.type == event_type; }); // Removing random items breaks the invariant so we have to re-establish it. if (itr != event_queue.end()) { @@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) { } } -void RemoveNormalAndThreadsafeEvent(const EventType* event_type) { +void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) { MoveEvents(); RemoveEvent(event_type); } -void ForceExceptionCheck(s64 cycles) { +void CoreTiming::ForceExceptionCheck(s64 cycles) { cycles = std::max<s64>(0, cycles); - if (downcount > cycles) { - // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int - // here. Account for cycles already executed by adjusting the g.slice_length - slice_length -= downcount - static_cast<int>(cycles); - downcount = static_cast<int>(cycles); + if (downcount <= cycles) { + return; } + + // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int + // here. Account for cycles already executed by adjusting the g.slice_length + slice_length -= downcount - static_cast<int>(cycles); + downcount = static_cast<int>(cycles); } -void MoveEvents() { +void CoreTiming::MoveEvents() { for (Event ev; ts_queue.Pop(ev);) { ev.fifo_order = event_fifo_id++; event_queue.emplace_back(std::move(ev)); @@ -199,13 +170,13 @@ void MoveEvents() { } } -void Advance() { +void CoreTiming::Advance() { MoveEvents(); for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) { UnscheduleEvent(ev.first, ev.second); } - int cycles_executed = slice_length - downcount; + const int cycles_executed = slice_length - downcount; global_timer += cycles_executed; slice_length = MAX_SLICE_LENGTH; @@ -229,16 +200,16 @@ void Advance() { downcount = slice_length; } -void Idle() { +void CoreTiming::Idle() { idled_cycles += downcount; downcount = 0; } -std::chrono::microseconds GetGlobalTimeUs() { +std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; } -int GetDowncount() { +int CoreTiming::GetDowncount() const { return downcount; } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 093989d4c..59163bae1 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -4,92 +4,153 @@ #pragma once -/** - * This is a system to schedule events into the emulated machine's future. Time is measured - * in main CPU clock cycles. - * - * To schedule an event, you first have to register its type. This is where you pass in the - * callback. You then schedule events using the type id you get back. - * - * The int cyclesLate that the callbacks get is how many cycles late it was. - * So to schedule a new event on a regular basis: - * inside callback: - * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever") - */ - #include <chrono> #include <functional> #include <string> +#include <unordered_map> +#include <vector> #include "common/common_types.h" +#include "common/threadsafe_queue.h" namespace Core::Timing { -struct EventType; - +/// A callback that may be scheduled for a particular core timing event. using TimedCallback = std::function<void(u64 userdata, int cycles_late)>; -/** - * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is - * required to end slice -1 and start slice 0 before the first cycle of code is executed. - */ -void Init(); -void Shutdown(); - -/** - * This should only be called from the emu thread, if you are calling it any other thread, you are - * doing something evil - */ -u64 GetTicks(); -u64 GetIdleTicks(); -void AddTicks(u64 ticks); - -/** - * Returns the event_type identifier. if name is not unique, it will assert. - */ -EventType* RegisterEvent(const std::string& name, TimedCallback callback); -void UnregisterAllEvents(); - -/** - * After the first Advance, the slice lengths and the downcount will be reduced whenever an event - * is scheduled earlier than the current values. - * Scheduling from a callback will not update the downcount until the Advance() completes. - */ -void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0); +/// Contains the characteristics of a particular event. +struct EventType { + /// The event's callback function. + TimedCallback callback; + /// A pointer to the name of the event. + const std::string* name; +}; /** - * This is to be called when outside of hle threads, such as the graphics thread, wants to - * schedule things to be executed on the main thread. - * Not that this doesn't change slice_length and thus events scheduled by this might be called - * with a delay of up to MAX_SLICE_LENGTH - */ -void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata); - -void UnscheduleEvent(const EventType* event_type, u64 userdata); -void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata); - -/// We only permit one event of each type in the queue at a time. -void RemoveEvent(const EventType* event_type); -void RemoveNormalAndThreadsafeEvent(const EventType* event_type); - -/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends - * the previous timing slice and begins the next one, you must Advance from the previous - * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an - * Advance() is required to initialize the slice length before the first cycle of emulated - * instructions is executed. + * This is a system to schedule events into the emulated machine's future. Time is measured + * in main CPU clock cycles. + * + * To schedule an event, you first have to register its type. This is where you pass in the + * callback. You then schedule events using the type id you get back. + * + * The int cyclesLate that the callbacks get is how many cycles late it was. + * So to schedule a new event on a regular basis: + * inside callback: + * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever") */ -void Advance(); -void MoveEvents(); - -/// Pretend that the main CPU has executed enough cycles to reach the next event. -void Idle(); - -/// Clear all pending events. This should ONLY be done on exit. -void ClearPendingEvents(); - -void ForceExceptionCheck(s64 cycles); - -std::chrono::microseconds GetGlobalTimeUs(); - -int GetDowncount(); +class CoreTiming { +public: + CoreTiming(); + ~CoreTiming(); + + CoreTiming(const CoreTiming&) = delete; + CoreTiming(CoreTiming&&) = delete; + + CoreTiming& operator=(const CoreTiming&) = delete; + CoreTiming& operator=(CoreTiming&&) = delete; + + /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is + /// required to end slice - 1 and start slice 0 before the first cycle of code is executed. + void Initialize(); + + /// Tears down all timing related functionality. + void Shutdown(); + + /// Registers a core timing event with the given name and callback. + /// + /// @param name The name of the core timing event to register. + /// @param callback The callback to execute for the event. + /// + /// @returns An EventType instance representing the registered event. + /// + /// @pre The name of the event being registered must be unique among all + /// registered events. + /// + EventType* RegisterEvent(const std::string& name, TimedCallback callback); + + /// Unregisters all registered events thus far. + void UnregisterAllEvents(); + + /// After the first Advance, the slice lengths and the downcount will be reduced whenever an + /// event is scheduled earlier than the current values. + /// + /// Scheduling from a callback will not update the downcount until the Advance() completes. + void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0); + + /// This is to be called when outside of hle threads, such as the graphics thread, wants to + /// schedule things to be executed on the main thread. + /// + /// @note This doesn't change slice_length and thus events scheduled by this might be + /// called with a delay of up to MAX_SLICE_LENGTH + void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, + u64 userdata = 0); + + void UnscheduleEvent(const EventType* event_type, u64 userdata); + void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata); + + /// We only permit one event of each type in the queue at a time. + void RemoveEvent(const EventType* event_type); + void RemoveNormalAndThreadsafeEvent(const EventType* event_type); + + void ForceExceptionCheck(s64 cycles); + + /// This should only be called from the emu thread, if you are calling it any other thread, + /// you are doing something evil + u64 GetTicks() const; + + u64 GetIdleTicks() const; + + void AddTicks(u64 ticks); + + /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends + /// the previous timing slice and begins the next one, you must Advance from the previous + /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an + /// Advance() is required to initialize the slice length before the first cycle of emulated + /// instructions is executed. + void Advance(); + + /// Pretend that the main CPU has executed enough cycles to reach the next event. + void Idle(); + + std::chrono::microseconds GetGlobalTimeUs() const; + + int GetDowncount() const; + +private: + struct Event; + + /// Clear all pending events. This should ONLY be done on exit. + void ClearPendingEvents(); + void MoveEvents(); + + s64 global_timer = 0; + s64 idled_cycles = 0; + int slice_length = 0; + int downcount = 0; + + // Are we in a function that has been called from Advance() + // If events are scheduled from a function that gets called from Advance(), + // don't change slice_length and downcount. + bool is_global_timer_sane = false; + + // The queue is a min-heap using std::make_heap/push_heap/pop_heap. + // We don't use std::priority_queue because we need to be able to serialize, unserialize and + // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't + // accomodated by the standard adaptor class. + std::vector<Event> event_queue; + u64 event_fifo_id = 0; + + // Stores each element separately as a linked list node so pointers to elements + // remain stable regardless of rehashes/resizing. + std::unordered_map<std::string, EventType> event_types; + + // The queue for storing the events from other threads threadsafe until they will be added + // to the event_queue by the emu thread + Common::MPSCQueue<Event> ts_queue; + + // The queue for unscheduling the events from other threads threadsafe + Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue; + + EventType* ev_lost = nullptr; +}; } // namespace Core::Timing diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 769a6fefa..2ddb3610d 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp @@ -27,7 +27,8 @@ void CpuCoreManager::Initialize(System& system) { exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); for (std::size_t index = 0; index < cores.size(); ++index) { - cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index); + cores[index] = + std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index); } // Create threads for CPU cores 1-3, and build thread_to_cpu map diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index ca12fb4ab..dfac9a4b3 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp @@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_ } void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) { - std::ifstream file(filename); + std::ifstream file; + OpenFStream(file, filename, std::ios_base::in); if (!file.is_open()) return; diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 57157beb4..a250d088d 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -17,8 +17,7 @@ #include "core/hle/result.h" #include "core/memory.h" -namespace Kernel { -namespace AddressArbiter { +namespace Kernel::AddressArbiter { // Performs actual address waiting logic. static ResultCode WaitForAddress(VAddr address, s64 timeout) { @@ -176,5 +175,4 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { return WaitForAddress(address, timeout); } -} // namespace AddressArbiter -} // namespace Kernel +} // namespace Kernel::AddressArbiter diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index e3657b8e9..b58f21bec 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -8,9 +8,8 @@ union ResultCode; -namespace Kernel { +namespace Kernel::AddressArbiter { -namespace AddressArbiter { enum class ArbitrationType { WaitIfLessThan = 0, DecrementAndWaitIfLessThan = 1, @@ -29,6 +28,5 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); -} // namespace AddressArbiter -} // namespace Kernel +} // namespace Kernel::AddressArbiter diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 3721ae8fe..dd749eed4 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -86,11 +86,11 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_ } struct KernelCore::Impl { - void Initialize(KernelCore& kernel) { + void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) { Shutdown(); InitializeSystemResourceLimit(kernel); - InitializeThreads(); + InitializeThreads(core_timing); } void Shutdown() { @@ -122,9 +122,9 @@ struct KernelCore::Impl { ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); } - void InitializeThreads() { + void InitializeThreads(Core::Timing::CoreTiming& core_timing) { thread_wakeup_event_type = - Core::Timing::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); + core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); } std::atomic<u32> next_object_id{0}; @@ -152,8 +152,8 @@ KernelCore::~KernelCore() { Shutdown(); } -void KernelCore::Initialize() { - impl->Initialize(*this); +void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) { + impl->Initialize(*this, core_timing); } void KernelCore::Shutdown() { diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 7406f107e..154bced42 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -12,8 +12,9 @@ template <typename T> class ResultVal; namespace Core::Timing { +class CoreTiming; struct EventType; -} +} // namespace Core::Timing namespace Kernel { @@ -39,7 +40,11 @@ public: KernelCore& operator=(KernelCore&&) = delete; /// Resets the kernel to a clean slate for use. - void Initialize(); + /// + /// @param core_timing CoreTiming instance used to create any necessary + /// kernel-specific callback events. + /// + void Initialize(Core::Timing::CoreTiming& core_timing); /// Clears all resources in use by the kernel instance. void Shutdown(); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 9e2517e1b..44f30d070 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -111,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) { void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = Core::Timing::GetTicks(); + const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; if (thread != nullptr) { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 5f040f79f..c5d399bab 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -918,6 +918,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) } const auto& system = Core::System::GetInstance(); + const auto& core_timing = system.CoreTiming(); const auto& scheduler = system.CurrentScheduler(); const auto* const current_thread = scheduler.GetCurrentThread(); const bool same_thread = current_thread == thread; @@ -927,9 +928,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); - out_ticks = thread_ticks + (Core::Timing::GetTicks() - prev_ctx_ticks); + out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks); } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { - out_ticks = Core::Timing::GetTicks() - prev_ctx_ticks; + out_ticks = core_timing.GetTicks() - prev_ctx_ticks; } *result = out_ticks; @@ -1546,10 +1547,11 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to static u64 GetSystemTick() { LOG_TRACE(Kernel_SVC, "called"); - const u64 result{Core::Timing::GetTicks()}; + auto& core_timing = Core::System::GetInstance().CoreTiming(); + const u64 result{core_timing.GetTicks()}; // Advance time to defeat dumb games that busy-wait for the frame to end. - Core::Timing::AddTicks(400); + core_timing.AddTicks(400); return result; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 7881c2b90..6661e2130 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -43,7 +43,8 @@ Thread::~Thread() = default; void Thread::Stop() { // Cancel any outstanding wakeup events for this thread - Core::Timing::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); + Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), + callback_handle); kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; @@ -85,13 +86,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // This function might be called from any thread so we have to be cautious and use the // thread-safe version of ScheduleEvent. - Core::Timing::ScheduleEventThreadsafe(Core::Timing::nsToCycles(nanoseconds), - kernel.ThreadWakeupCallbackEventType(), callback_handle); + Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe( + Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(), + callback_handle); } void Thread::CancelWakeupTimer() { - Core::Timing::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), - callback_handle); + Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe( + kernel.ThreadWakeupCallbackEventType(), callback_handle); } static std::optional<s32> GetNextProcessorId(u64 mask) { @@ -190,6 +192,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name return ResultCode(-1); } + auto& system = Core::System::GetInstance(); SharedPtr<Thread> thread(new Thread(kernel)); thread->thread_id = kernel.CreateNewThreadID(); @@ -198,7 +201,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name thread->stack_top = stack_top; thread->tpidr_el0 = 0; thread->nominal_priority = thread->current_priority = priority; - thread->last_running_ticks = Core::Timing::GetTicks(); + thread->last_running_ticks = system.CoreTiming().GetTicks(); thread->processor_id = processor_id; thread->ideal_core = processor_id; thread->affinity_mask = 1ULL << processor_id; @@ -209,7 +212,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name thread->name = std::move(name); thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); thread->owner_process = &owner_process; - thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id); + thread->scheduler = &system.Scheduler(processor_id); thread->scheduler->AddThread(thread, priority); thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread); @@ -258,7 +261,7 @@ void Thread::SetStatus(ThreadStatus new_status) { } if (status == ThreadStatus::Running) { - last_running_ticks = Core::Timing::GetTicks(); + last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); } status = new_status; diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index dc6a6b188..6831c0735 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -68,12 +68,12 @@ public: RegisterHandlers(functions); // This is the event handle used to check if the audio buffer was released - auto& kernel = Core::System::GetInstance().Kernel(); - buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, - "IAudioOutBufferReleased"); + auto& system = Core::System::GetInstance(); + buffer_event = Kernel::WritableEvent::CreateEventPair( + system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased"); - stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count, - std::move(unique_name), + stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, + audio_params.channel_count, std::move(unique_name), [=]() { buffer_event.writable->Signal(); }); } diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 76cc48254..7e0cc64a8 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -42,10 +42,11 @@ public: // clang-format on RegisterHandlers(functions); - auto& kernel = Core::System::GetInstance().Kernel(); - system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, - "IAudioRenderer:SystemEvent"); - renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable); + auto& system = Core::System::GetInstance(); + system_event = Kernel::WritableEvent::CreateEventPair( + system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent"); + renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, + system_event.writable); } private: diff --git a/src/core/hle/service/hid/controllers/controller_base.h b/src/core/hle/service/hid/controllers/controller_base.h index f0e092b1b..5e5097a03 100644 --- a/src/core/hle/service/hid/controllers/controller_base.h +++ b/src/core/hle/service/hid/controllers/controller_base.h @@ -7,6 +7,10 @@ #include "common/common_types.h" #include "common/swap.h" +namespace Core::Timing { +class CoreTiming; +} + namespace Service::HID { class ControllerBase { public: @@ -20,7 +24,8 @@ public: virtual void OnRelease() = 0; // When the controller is requesting an update for the shared memory - virtual void OnUpdate(u8* data, std::size_t size) = 0; + virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) = 0; // Called when input devices should be loaded virtual void OnLoadInputDevices() = 0; diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp index b264c9503..c5c2e032a 100644 --- a/src/core/hle/service/hid/controllers/debug_pad.cpp +++ b/src/core/hle/service/hid/controllers/debug_pad.cpp @@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {} void Controller_DebugPad::OnRelease() {} -void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = Core::Timing::GetTicks(); +void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { + shared_memory.header.timestamp = core_timing.GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/debug_pad.h b/src/core/hle/service/hid/controllers/debug_pad.h index 68b734248..929035034 100644 --- a/src/core/hle/service/hid/controllers/debug_pad.h +++ b/src/core/hle/service/hid/controllers/debug_pad.h @@ -26,7 +26,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp index 6d21f1a7d..a179252e3 100644 --- a/src/core/hle/service/hid/controllers/gesture.cpp +++ b/src/core/hle/service/hid/controllers/gesture.cpp @@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {} void Controller_Gesture::OnRelease() {} -void Controller_Gesture::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = Core::Timing::GetTicks(); +void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { + shared_memory.header.timestamp = core_timing.GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/gesture.h b/src/core/hle/service/hid/controllers/gesture.h index 1056ffbcd..f305fe90f 100644 --- a/src/core/hle/service/hid/controllers/gesture.h +++ b/src/core/hle/service/hid/controllers/gesture.h @@ -22,7 +22,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index 798f30436..92d7bfb52 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp @@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {} void Controller_Keyboard::OnRelease() {} -void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = Core::Timing::GetTicks(); +void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { + shared_memory.header.timestamp = core_timing.GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/keyboard.h b/src/core/hle/service/hid/controllers/keyboard.h index f52775456..73cd2c7bb 100644 --- a/src/core/hle/service/hid/controllers/keyboard.h +++ b/src/core/hle/service/hid/controllers/keyboard.h @@ -25,7 +25,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp index 4985037be..11ab096d9 100644 --- a/src/core/hle/service/hid/controllers/mouse.cpp +++ b/src/core/hle/service/hid/controllers/mouse.cpp @@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default; void Controller_Mouse::OnInit() {} void Controller_Mouse::OnRelease() {} -void Controller_Mouse::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = Core::Timing::GetTicks(); +void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { + shared_memory.header.timestamp = core_timing.GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/mouse.h b/src/core/hle/service/hid/controllers/mouse.h index 70b654d07..9d46eecbe 100644 --- a/src/core/hle/service/hid/controllers/mouse.h +++ b/src/core/hle/service/hid/controllers/mouse.h @@ -24,7 +24,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index ffdd1c593..e7fc7a619 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX); } -void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) { +void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t data_len) { if (!IsControllerActivated()) return; for (std::size_t i = 0; i < shared_memory_entries.size(); i++) { @@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) { const auto& last_entry = main_controller->npad[main_controller->common.last_entry_index]; - main_controller->common.timestamp = Core::Timing::GetTicks(); + main_controller->common.timestamp = core_timing.GetTicks(); main_controller->common.last_entry_index = (main_controller->common.last_entry_index + 1) % 17; diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 106cf58c8..18c7a94e6 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -30,7 +30,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp index cca4dca1d..946948f5e 100644 --- a/src/core/hle/service/hid/controllers/stubbed.cpp +++ b/src/core/hle/service/hid/controllers/stubbed.cpp @@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {} void Controller_Stubbed::OnRelease() {} -void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) { +void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { if (!smart_update) { return; } CommonHeader header{}; - header.timestamp = Core::Timing::GetTicks(); + header.timestamp = core_timing.GetTicks(); header.total_entry_count = 17; header.entry_count = 0; header.last_entry_index = 0; diff --git a/src/core/hle/service/hid/controllers/stubbed.h b/src/core/hle/service/hid/controllers/stubbed.h index 4a21c643e..24469f03e 100644 --- a/src/core/hle/service/hid/controllers/stubbed.h +++ b/src/core/hle/service/hid/controllers/stubbed.h @@ -20,7 +20,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp index a7c8acc72..1a8445a43 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.cpp +++ b/src/core/hle/service/hid/controllers/touchscreen.cpp @@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {} void Controller_Touchscreen::OnRelease() {} -void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { - shared_memory.header.timestamp = Core::Timing::GetTicks(); +void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { + shared_memory.header.timestamp = core_timing.GetTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { @@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) { touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; - const u64 tick = Core::Timing::GetTicks(); + const u64 tick = core_timing.GetTicks(); touch_entry.delta_time = tick - last_touch; last_touch = tick; touch_entry.finger = Settings::values.touchscreen.finger; diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h index 94cd0eba9..012b6e0dd 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.h +++ b/src/core/hle/service/hid/controllers/touchscreen.h @@ -24,7 +24,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp index eff03d14e..1a9da9576 100644 --- a/src/core/hle/service/hid/controllers/xpad.cpp +++ b/src/core/hle/service/hid/controllers/xpad.cpp @@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {} void Controller_XPad::OnRelease() {} -void Controller_XPad::OnUpdate(u8* data, std::size_t size) { +void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, + std::size_t size) { for (auto& xpad_entry : shared_memory.shared_memory_entries) { - xpad_entry.header.timestamp = Core::Timing::GetTicks(); + xpad_entry.header.timestamp = core_timing.GetTicks(); xpad_entry.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/xpad.h b/src/core/hle/service/hid/controllers/xpad.h index ff836989f..2864e6617 100644 --- a/src/core/hle/service/hid/controllers/xpad.h +++ b/src/core/hle/service/hid/controllers/xpad.h @@ -22,7 +22,7 @@ public: void OnRelease() override; // When the controller is requesting an update for the shared memory - void OnUpdate(u8* data, std::size_t size) override; + void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override; // Called when input devices should be loaded void OnLoadInputDevices() override; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 79c320d04..8a6de83a2 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -73,13 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") { GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000); // Register update callbacks - pad_update_event = Core::Timing::RegisterEvent( - "HID::UpdatePadCallback", - [this](u64 userdata, int cycles_late) { UpdateControllers(userdata, cycles_late); }); + auto& core_timing = Core::System::GetInstance().CoreTiming(); + pad_update_event = + core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) { + UpdateControllers(userdata, cycles_late); + }); // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) - Core::Timing::ScheduleEvent(pad_update_ticks, pad_update_event); + core_timing.ScheduleEvent(pad_update_ticks, pad_update_event); ReloadInputDevices(); } @@ -93,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) { } IAppletResource ::~IAppletResource() { - Core::Timing::UnscheduleEvent(pad_update_event, 0); + Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0); } void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { @@ -105,15 +107,17 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { } void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) { + auto& core_timing = Core::System::GetInstance().CoreTiming(); + const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); for (const auto& controller : controllers) { if (should_reload) { controller->OnLoadInputDevices(); } - controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE); + controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE); } - Core::Timing::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); + core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); } class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index b427d4068..2c4625c99 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 5}; rb.Push(RESULT_SUCCESS); - rb.PushRaw<u64>(Core::Timing::GetTicks()); + rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks()); rb.PushRaw<u32>(0); } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 88d80ba06..45812d238 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -5,6 +5,7 @@ #include <cstring> #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" @@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o IoctlGetGpuTime params{}; std::memcpy(¶ms, input.data(), input.size()); - params.gpu_time = Core::Timing::cyclesToNs(Core::Timing::GetTicks()); + params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); std::memcpy(output.data(), ¶ms, output.size()); return 0; } diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index ce1b59860..56f31e2ac 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -14,11 +14,12 @@ #include "core/core_timing_util.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/readable_event.h" -#include "core/hle/kernel/writable_event.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "core/hle/service/nvflinger/nvflinger.h" +#include "core/hle/service/vi/display/vi_display.h" +#include "core/hle/service/vi/layer/vi_layer.h" #include "core/perf_stats.h" #include "video_core/renderer_base.h" @@ -27,19 +28,25 @@ namespace Service::NVFlinger { constexpr std::size_t SCREEN_REFRESH_RATE = 60; constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); -NVFlinger::NVFlinger() { +NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} { + displays.emplace_back(0, "Default"); + displays.emplace_back(1, "External"); + displays.emplace_back(2, "Edid"); + displays.emplace_back(3, "Internal"); + displays.emplace_back(4, "Null"); + // Schedule the screen composition events composition_event = - Core::Timing::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { + core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { Compose(); - Core::Timing::ScheduleEvent(frame_ticks - cycles_late, composition_event); + this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event); }); - Core::Timing::ScheduleEvent(frame_ticks, composition_event); + core_timing.ScheduleEvent(frame_ticks, composition_event); } NVFlinger::~NVFlinger() { - Core::Timing::UnscheduleEvent(composition_event, 0); + core_timing.UnscheduleEvent(composition_event, 0); } void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { @@ -52,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { // TODO(Subv): Currently we only support the Default display. ASSERT(name == "Default"); - const auto itr = std::find_if(displays.begin(), displays.end(), - [&](const Display& display) { return display.name == name; }); + const auto itr = + std::find_if(displays.begin(), displays.end(), + [&](const VI::Display& display) { return display.GetName() == name; }); if (itr == displays.end()) { return {}; } - return itr->id; + return itr->GetID(); } std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { @@ -68,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { return {}; } - ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment"); - const u64 layer_id = next_layer_id++; const u32 buffer_queue_id = next_buffer_queue_id++; - auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); - display->layers.emplace_back(layer_id, buffer_queue); - buffer_queues.emplace_back(std::move(buffer_queue)); + buffer_queues.emplace_back(buffer_queue_id, layer_id); + display->CreateLayer(layer_id, buffer_queues.back()); return layer_id; } @@ -85,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co return {}; } - return layer->buffer_queue->GetId(); + return layer->GetBufferQueue().GetId(); } Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { @@ -95,20 +100,29 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i return nullptr; } - return display->vsync_event.readable; + return display->GetVSyncEvent(); +} + +BufferQueue& NVFlinger::FindBufferQueue(u32 id) { + const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), + [id](const auto& queue) { return queue.GetId() == id; }); + + ASSERT(itr != buffer_queues.end()); + return *itr; } -std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { +const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const { const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), - [&](const auto& queue) { return queue->GetId() == id; }); + [id](const auto& queue) { return queue.GetId() == id; }); ASSERT(itr != buffer_queues.end()); return *itr; } -Display* NVFlinger::FindDisplay(u64 display_id) { - const auto itr = std::find_if(displays.begin(), displays.end(), - [&](const Display& display) { return display.id == display_id; }); +VI::Display* NVFlinger::FindDisplay(u64 display_id) { + const auto itr = + std::find_if(displays.begin(), displays.end(), + [&](const VI::Display& display) { return display.GetID() == display_id; }); if (itr == displays.end()) { return nullptr; @@ -117,9 +131,10 @@ Display* NVFlinger::FindDisplay(u64 display_id) { return &*itr; } -const Display* NVFlinger::FindDisplay(u64 display_id) const { - const auto itr = std::find_if(displays.begin(), displays.end(), - [&](const Display& display) { return display.id == display_id; }); +const VI::Display* NVFlinger::FindDisplay(u64 display_id) const { + const auto itr = + std::find_if(displays.begin(), displays.end(), + [&](const VI::Display& display) { return display.GetID() == display_id; }); if (itr == displays.end()) { return nullptr; @@ -128,57 +143,41 @@ const Display* NVFlinger::FindDisplay(u64 display_id) const { return &*itr; } -Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) { +VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) { auto* const display = FindDisplay(display_id); if (display == nullptr) { return nullptr; } - const auto itr = std::find_if(display->layers.begin(), display->layers.end(), - [&](const Layer& layer) { return layer.id == layer_id; }); - - if (itr == display->layers.end()) { - return nullptr; - } - - return &*itr; + return display->FindLayer(layer_id); } -const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { +const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { const auto* const display = FindDisplay(display_id); if (display == nullptr) { return nullptr; } - const auto itr = std::find_if(display->layers.begin(), display->layers.end(), - [&](const Layer& layer) { return layer.id == layer_id; }); - - if (itr == display->layers.end()) { - return nullptr; - } - - return &*itr; + return display->FindLayer(layer_id); } void NVFlinger::Compose() { for (auto& display : displays) { // Trigger vsync for this display at the end of drawing - SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); + SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. - if (display.layers.empty()) + if (!display.HasLayers()) continue; // TODO(Subv): Support more than 1 layer. - ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); - - Layer& layer = display.layers[0]; - auto& buffer_queue = layer.buffer_queue; + VI::Layer& layer = display.GetLayer(0); + auto& buffer_queue = layer.GetBufferQueue(); // Search for a queued buffer and acquire it - auto buffer = buffer_queue->AcquireBuffer(); + auto buffer = buffer_queue.AcquireBuffer(); MicroProfileFlip(); @@ -203,19 +202,8 @@ void NVFlinger::Compose() { igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->get().transform, buffer->get().crop_rect); - buffer_queue->ReleaseBuffer(buffer->get().slot); + buffer_queue.ReleaseBuffer(buffer->get().slot); } } -Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {} -Layer::~Layer() = default; - -Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) { - auto& kernel = Core::System::GetInstance().Kernel(); - vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, - fmt::format("Display VSync Event {}", id)); -} - -Display::~Display() = default; - } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 6d8bcbd30..c0a83fffb 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -4,7 +4,6 @@ #pragma once -#include <array> #include <memory> #include <optional> #include <string> @@ -15,8 +14,9 @@ #include "core/hle/kernel/object.h" namespace Core::Timing { +class CoreTiming; struct EventType; -} +} // namespace Core::Timing namespace Kernel { class ReadableEvent; @@ -25,34 +25,20 @@ class WritableEvent; namespace Service::Nvidia { class Module; -} +} // namespace Service::Nvidia + +namespace Service::VI { +class Display; +class Layer; +} // namespace Service::VI namespace Service::NVFlinger { class BufferQueue; -struct Layer { - Layer(u64 id, std::shared_ptr<BufferQueue> queue); - ~Layer(); - - u64 id; - std::shared_ptr<BufferQueue> buffer_queue; -}; - -struct Display { - Display(u64 id, std::string name); - ~Display(); - - u64 id; - std::string name; - - std::vector<Layer> layers; - Kernel::EventPair vsync_event; -}; - class NVFlinger final { public: - NVFlinger(); + explicit NVFlinger(Core::Timing::CoreTiming& core_timing); ~NVFlinger(); /// Sets the NVDrv module instance to use to send buffers to the GPU. @@ -79,7 +65,10 @@ public: Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; /// Obtains a buffer queue identified by the ID. - std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; + BufferQueue& FindBufferQueue(u32 id); + + /// Obtains a buffer queue identified by the ID. + const BufferQueue& FindBufferQueue(u32 id) const; /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when /// finished. @@ -87,27 +76,21 @@ public: private: /// Finds the display identified by the specified ID. - Display* FindDisplay(u64 display_id); + VI::Display* FindDisplay(u64 display_id); /// Finds the display identified by the specified ID. - const Display* FindDisplay(u64 display_id) const; + const VI::Display* FindDisplay(u64 display_id) const; /// Finds the layer identified by the specified ID in the desired display. - Layer* FindLayer(u64 display_id, u64 layer_id); + VI::Layer* FindLayer(u64 display_id, u64 layer_id); /// Finds the layer identified by the specified ID in the desired display. - const Layer* FindLayer(u64 display_id, u64 layer_id) const; + const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const; std::shared_ptr<Nvidia::Module> nvdrv; - std::array<Display, 5> displays{{ - {0, "Default"}, - {1, "External"}, - {2, "Edid"}, - {3, "Internal"}, - {4, "Null"}, - }}; - std::vector<std::shared_ptr<BufferQueue>> buffer_queues; + std::vector<VI::Display> displays; + std::vector<BufferQueue> buffer_queues; /// Id to use for the next layer that is created, this counter is shared among all displays. u64 next_layer_id = 1; @@ -117,6 +100,9 @@ private: /// Event that handles screen composition. Core::Timing::EventType* composition_event; + + /// Core timing instance for registering/unregistering the composition event. + Core::Timing::CoreTiming& core_timing; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index d25b80ab0..117f87a45 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -194,10 +194,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co // Module interface /// Initialize ServiceManager -void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) { +void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, + FileSys::VfsFilesystem& vfs) { // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it // here and pass it into the respective InstallInterfaces functions. - auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(); + auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); SM::ServiceManager::InstallInterfaces(sm); diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index 029533628..830790269 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h @@ -14,6 +14,14 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// // Namespace Service +namespace Core { +class System; +} + +namespace FileSys { +class VfsFilesystem; +} + namespace Kernel { class ClientPort; class ServerPort; @@ -21,10 +29,6 @@ class ServerSession; class HLERequestContext; } // namespace Kernel -namespace FileSys { -class VfsFilesystem; -} - namespace Service { namespace SM { @@ -178,7 +182,8 @@ private: }; /// Initialize ServiceManager -void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs); +void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, + FileSys::VfsFilesystem& vfs); /// Shutdown ServiceManager void Shutdown(); diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index efebd1b24..aa115935d 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -5,6 +5,7 @@ #include <chrono> #include <ctime> #include "common/logging/log.h" +#include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/ipc_helpers.h" @@ -106,8 +107,9 @@ private: void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); + const auto& core_timing = Core::System::GetInstance().CoreTiming(); const SteadyClockTimePoint steady_clock_time_point{ - Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000}; + Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000}; IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; rb.Push(RESULT_SUCCESS); rb.PushRaw(steady_clock_time_point); @@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { return; } + const auto& core_timing = Core::System::GetInstance().CoreTiming(); const SteadyClockTimePoint steady_clock_time_point{ - Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000, {}}; + Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}}; CalendarTime calendar_time{}; calendar_time.year = tm->tm_year + 1900; diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp new file mode 100644 index 000000000..01d80311b --- /dev/null +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -0,0 +1,71 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <utility> + +#include <fmt/format.h> + +#include "common/assert.h" +#include "core/core.h" +#include "core/hle/kernel/readable_event.h" +#include "core/hle/service/vi/display/vi_display.h" +#include "core/hle/service/vi/layer/vi_layer.h" + +namespace Service::VI { + +Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} { + auto& kernel = Core::System::GetInstance().Kernel(); + vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky, + fmt::format("Display VSync Event {}", id)); +} + +Display::~Display() = default; + +Layer& Display::GetLayer(std::size_t index) { + return layers.at(index); +} + +const Layer& Display::GetLayer(std::size_t index) const { + return layers.at(index); +} + +Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const { + return vsync_event.readable; +} + +void Display::SignalVSyncEvent() { + vsync_event.writable->Signal(); +} + +void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) { + // TODO(Subv): Support more than 1 layer. + ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); + + layers.emplace_back(id, buffer_queue); +} + +Layer* Display::FindLayer(u64 id) { + const auto itr = std::find_if(layers.begin(), layers.end(), + [id](const VI::Layer& layer) { return layer.GetID() == id; }); + + if (itr == layers.end()) { + return nullptr; + } + + return &*itr; +} + +const Layer* Display::FindLayer(u64 id) const { + const auto itr = std::find_if(layers.begin(), layers.end(), + [id](const VI::Layer& layer) { return layer.GetID() == id; }); + + if (itr == layers.end()) { + return nullptr; + } + + return &*itr; +} + +} // namespace Service::VI diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h new file mode 100644 index 000000000..2acd46ff8 --- /dev/null +++ b/src/core/hle/service/vi/display/vi_display.h @@ -0,0 +1,98 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <vector> + +#include "common/common_types.h" +#include "core/hle/kernel/writable_event.h" + +namespace Service::NVFlinger { +class BufferQueue; +} + +namespace Service::VI { + +class Layer; + +/// Represents a single display type +class Display { +public: + /// Constructs a display with a given unique ID and name. + /// + /// @param id The unique ID for this display. + /// @param name The name for this display. + /// + Display(u64 id, std::string name); + ~Display(); + + Display(const Display&) = delete; + Display& operator=(const Display&) = delete; + + Display(Display&&) = default; + Display& operator=(Display&&) = default; + + /// Gets the unique ID assigned to this display. + u64 GetID() const { + return id; + } + + /// Gets the name of this display + const std::string& GetName() const { + return name; + } + + /// Whether or not this display has any layers added to it. + bool HasLayers() const { + return !layers.empty(); + } + + /// Gets a layer for this display based off an index. + Layer& GetLayer(std::size_t index); + + /// Gets a layer for this display based off an index. + const Layer& GetLayer(std::size_t index) const; + + /// Gets the readable vsync event. + Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const; + + /// Signals the internal vsync event. + void SignalVSyncEvent(); + + /// Creates and adds a layer to this display with the given ID. + /// + /// @param id The ID to assign to the created layer. + /// @param buffer_queue The buffer queue for the layer instance to use. + /// + void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue); + + /// Attempts to find a layer with the given ID. + /// + /// @param id The layer ID. + /// + /// @returns If found, the Layer instance with the given ID. + /// If not found, then nullptr is returned. + /// + Layer* FindLayer(u64 id); + + /// Attempts to find a layer with the given ID. + /// + /// @param id The layer ID. + /// + /// @returns If found, the Layer instance with the given ID. + /// If not found, then nullptr is returned. + /// + const Layer* FindLayer(u64 id) const; + +private: + u64 id; + std::string name; + + std::vector<Layer> layers; + Kernel::EventPair vsync_event; +}; + +} // namespace Service::VI diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp new file mode 100644 index 000000000..954225c26 --- /dev/null +++ b/src/core/hle/service/vi/layer/vi_layer.cpp @@ -0,0 +1,13 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/hle/service/vi/layer/vi_layer.h" + +namespace Service::VI { + +Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {} + +Layer::~Layer() = default; + +} // namespace Service::VI diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h new file mode 100644 index 000000000..c6bfd01f6 --- /dev/null +++ b/src/core/hle/service/vi/layer/vi_layer.h @@ -0,0 +1,52 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Service::NVFlinger { +class BufferQueue; +} + +namespace Service::VI { + +/// Represents a single display layer. +class Layer { +public: + /// Constructs a layer with a given ID and buffer queue. + /// + /// @param id The ID to assign to this layer. + /// @param queue The buffer queue for this layer to use. + /// + Layer(u64 id, NVFlinger::BufferQueue& queue); + ~Layer(); + + Layer(const Layer&) = delete; + Layer& operator=(const Layer&) = delete; + + Layer(Layer&&) = default; + Layer& operator=(Layer&&) = delete; + + /// Gets the ID for this layer. + u64 GetID() const { + return id; + } + + /// Gets a reference to the buffer queue this layer is using. + NVFlinger::BufferQueue& GetBufferQueue() { + return buffer_queue; + } + + /// Gets a const reference to the buffer queue this layer is using. + const NVFlinger::BufferQueue& GetBufferQueue() const { + return buffer_queue; + } + +private: + u64 id; + NVFlinger::BufferQueue& buffer_queue; +}; + +} // namespace Service::VI diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index a317a2885..7369a09ec 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -525,7 +525,7 @@ private: LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, static_cast<u32>(transaction), flags); - auto buffer_queue = nv_flinger->FindBufferQueue(id); + auto& buffer_queue = nv_flinger->FindBufferQueue(id); if (transaction == TransactionId::Connect) { IGBPConnectRequestParcel request{ctx.ReadBuffer()}; @@ -538,7 +538,7 @@ private: } else if (transaction == TransactionId::SetPreallocatedBuffer) { IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; - buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); + buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); IGBPSetPreallocatedBufferResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); @@ -546,7 +546,7 @@ private: IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; const u32 width{request.data.width}; const u32 height{request.data.height}; - std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); + std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); if (slot) { // Buffer is available @@ -559,8 +559,8 @@ private: [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) { // Repeat TransactParcel DequeueBuffer when a buffer is available - auto buffer_queue = nv_flinger->FindBufferQueue(id); - std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); + auto& buffer_queue = nv_flinger->FindBufferQueue(id); + std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); IGBPDequeueBufferResponseParcel response{*slot}; @@ -568,28 +568,28 @@ private: IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); }, - buffer_queue->GetWritableBufferWaitEvent()); + buffer_queue.GetWritableBufferWaitEvent()); } } else if (transaction == TransactionId::RequestBuffer) { IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; - auto& buffer = buffer_queue->RequestBuffer(request.slot); + auto& buffer = buffer_queue.RequestBuffer(request.slot); IGBPRequestBufferResponseParcel response{buffer}; ctx.WriteBuffer(response.Serialize()); } else if (transaction == TransactionId::QueueBuffer) { IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; - buffer_queue->QueueBuffer(request.data.slot, request.data.transform, - request.data.GetCropRect()); + buffer_queue.QueueBuffer(request.data.slot, request.data.transform, + request.data.GetCropRect()); IGBPQueueBufferResponseParcel response{1280, 720}; ctx.WriteBuffer(response.Serialize()); } else if (transaction == TransactionId::Query) { IGBPQueryRequestParcel request{ctx.ReadBuffer()}; - u32 value = - buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); + const u32 value = + buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); IGBPQueryResponseParcel response{value}; ctx.WriteBuffer(response.Serialize()); @@ -629,12 +629,12 @@ private: LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); - const auto buffer_queue = nv_flinger->FindBufferQueue(id); + const auto& buffer_queue = nv_flinger->FindBufferQueue(id); // TODO(Subv): Find out what this actually is. IPC::ResponseBuilder rb{ctx, 2, 1}; rb.Push(RESULT_SUCCESS); - rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); + rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent()); } std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 77607a755..340d6a272 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) { REQUIRE(lateness == cycles_late); } -class ScopeInit final { -public: +struct ScopeInit final { ScopeInit() { - Core::Timing::Init(); + core_timing.Initialize(); } ~ScopeInit() { - Core::Timing::Shutdown(); + core_timing.Shutdown(); } + + Core::Timing::CoreTiming core_timing; }; -static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0, - int cpu_downcount = 0) { +static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount, + int expected_lateness = 0, int cpu_downcount = 0) { callbacks_ran_flags = 0; expected_callback = CB_IDS[idx]; lateness = expected_lateness; // Pretend we executed X cycles of instructions. - Core::Timing::AddTicks(Core::Timing::GetDowncount() - cpu_downcount); - Core::Timing::Advance(); + core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount); + core_timing.Advance(); REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); - REQUIRE(downcount == Core::Timing::GetDowncount()); + REQUIRE(downcount == core_timing.GetDowncount()); } TEST_CASE("CoreTiming[BasicOrder]", "[core]") { ScopeInit guard; + auto& core_timing = guard.core_timing; - Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); - Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>); - Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>); - Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>); + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>); + Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>); // Enter slice 0 - Core::Timing::Advance(); + core_timing.Advance(); // D -> B -> C -> A -> E - Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]); - REQUIRE(1000 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEvent(500, cb_b, CB_IDS[1]); - REQUIRE(500 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEvent(800, cb_c, CB_IDS[2]); - REQUIRE(500 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEvent(100, cb_d, CB_IDS[3]); - REQUIRE(100 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEvent(1200, cb_e, CB_IDS[4]); - REQUIRE(100 == Core::Timing::GetDowncount()); - - AdvanceAndCheck(3, 400); - AdvanceAndCheck(1, 300); - AdvanceAndCheck(2, 200); - AdvanceAndCheck(0, 200); - AdvanceAndCheck(4, MAX_SLICE_LENGTH); + core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); + REQUIRE(1000 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); + REQUIRE(500 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); + REQUIRE(500 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); + REQUIRE(100 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); + REQUIRE(100 == core_timing.GetDowncount()); + + AdvanceAndCheck(core_timing, 3, 400); + AdvanceAndCheck(core_timing, 1, 300); + AdvanceAndCheck(core_timing, 2, 200); + AdvanceAndCheck(core_timing, 0, 200); + AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH); } TEST_CASE("CoreTiming[Threadsave]", "[core]") { ScopeInit guard; + auto& core_timing = guard.core_timing; - Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); - Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>); - Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>); - Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>); + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>); + Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>); // Enter slice 0 - Core::Timing::Advance(); + core_timing.Advance(); // D -> B -> C -> A -> E - Core::Timing::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); + core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]); // Manually force since ScheduleEventThreadsafe doesn't call it - Core::Timing::ForceExceptionCheck(1000); - REQUIRE(1000 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); + core_timing.ForceExceptionCheck(1000); + REQUIRE(1000 == core_timing.GetDowncount()); + core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]); // Manually force since ScheduleEventThreadsafe doesn't call it - Core::Timing::ForceExceptionCheck(500); - REQUIRE(500 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); + core_timing.ForceExceptionCheck(500); + REQUIRE(500 == core_timing.GetDowncount()); + core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]); // Manually force since ScheduleEventThreadsafe doesn't call it - Core::Timing::ForceExceptionCheck(800); - REQUIRE(500 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); + core_timing.ForceExceptionCheck(800); + REQUIRE(500 == core_timing.GetDowncount()); + core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]); // Manually force since ScheduleEventThreadsafe doesn't call it - Core::Timing::ForceExceptionCheck(100); - REQUIRE(100 == Core::Timing::GetDowncount()); - Core::Timing::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); + core_timing.ForceExceptionCheck(100); + REQUIRE(100 == core_timing.GetDowncount()); + core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]); // Manually force since ScheduleEventThreadsafe doesn't call it - Core::Timing::ForceExceptionCheck(1200); - REQUIRE(100 == Core::Timing::GetDowncount()); - - AdvanceAndCheck(3, 400); - AdvanceAndCheck(1, 300); - AdvanceAndCheck(2, 200); - AdvanceAndCheck(0, 200); - AdvanceAndCheck(4, MAX_SLICE_LENGTH); + core_timing.ForceExceptionCheck(1200); + REQUIRE(100 == core_timing.GetDowncount()); + + AdvanceAndCheck(core_timing, 3, 400); + AdvanceAndCheck(core_timing, 1, 300); + AdvanceAndCheck(core_timing, 2, 200); + AdvanceAndCheck(core_timing, 0, 200); + AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH); } namespace SharedSlotTest { @@ -142,59 +145,62 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") { using namespace SharedSlotTest; ScopeInit guard; + auto& core_timing = guard.core_timing; - Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", FifoCallback<0>); - Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", FifoCallback<1>); - Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", FifoCallback<2>); - Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", FifoCallback<3>); - Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", FifoCallback<4>); + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>); + Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>); + Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>); + Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>); - Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]); - Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]); - Core::Timing::ScheduleEvent(1000, cb_c, CB_IDS[2]); - Core::Timing::ScheduleEvent(1000, cb_d, CB_IDS[3]); - Core::Timing::ScheduleEvent(1000, cb_e, CB_IDS[4]); + core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); + core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]); + core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]); + core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]); + core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]); // Enter slice 0 - Core::Timing::Advance(); - REQUIRE(1000 == Core::Timing::GetDowncount()); + core_timing.Advance(); + REQUIRE(1000 == core_timing.GetDowncount()); callbacks_ran_flags = 0; counter = 0; lateness = 0; - Core::Timing::AddTicks(Core::Timing::GetDowncount()); - Core::Timing::Advance(); - REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount()); + core_timing.AddTicks(core_timing.GetDowncount()); + core_timing.Advance(); + REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount()); REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); } TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { ScopeInit guard; + auto& core_timing = guard.core_timing; - Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); // Enter slice 0 - Core::Timing::Advance(); + core_timing.Advance(); - Core::Timing::ScheduleEvent(100, cb_a, CB_IDS[0]); - Core::Timing::ScheduleEvent(200, cb_b, CB_IDS[1]); + core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); + core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); - AdvanceAndCheck(0, 90, 10, -10); // (100 - 10) - AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50); + AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10) + AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50); } namespace ChainSchedulingTest { static int reschedules = 0; -static void RescheduleCallback(u64 userdata, s64 cycles_late) { +static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata, + s64 cycles_late) { --reschedules; REQUIRE(reschedules >= 0); REQUIRE(lateness == cycles_late); if (reschedules > 0) { - Core::Timing::ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata), - userdata); + core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata), + userdata); } } } // namespace ChainSchedulingTest @@ -203,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { using namespace ChainSchedulingTest; ScopeInit guard; + auto& core_timing = guard.core_timing; - Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>); - Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>); - Core::Timing::EventType* cb_rs = - Core::Timing::RegisterEvent("callbackReschedule", RescheduleCallback); + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_rs = core_timing.RegisterEvent( + "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) { + RescheduleCallback(core_timing, userdata, cycles_late); + }); // Enter slice 0 - Core::Timing::Advance(); + core_timing.Advance(); - Core::Timing::ScheduleEvent(800, cb_a, CB_IDS[0]); - Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]); - Core::Timing::ScheduleEvent(2200, cb_c, CB_IDS[2]); - Core::Timing::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); - REQUIRE(800 == Core::Timing::GetDowncount()); + core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]); + core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]); + core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]); + core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs)); + REQUIRE(800 == core_timing.GetDowncount()); reschedules = 3; - AdvanceAndCheck(0, 200); // cb_a - AdvanceAndCheck(1, 1000); // cb_b, cb_rs + AdvanceAndCheck(core_timing, 0, 200); // cb_a + AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs REQUIRE(2 == reschedules); - Core::Timing::AddTicks(Core::Timing::GetDowncount()); - Core::Timing::Advance(); // cb_rs + core_timing.AddTicks(core_timing.GetDowncount()); + core_timing.Advance(); // cb_rs REQUIRE(1 == reschedules); - REQUIRE(200 == Core::Timing::GetDowncount()); + REQUIRE(200 == core_timing.GetDowncount()); - AdvanceAndCheck(2, 800); // cb_c + AdvanceAndCheck(core_timing, 2, 800); // cb_c - Core::Timing::AddTicks(Core::Timing::GetDowncount()); - Core::Timing::Advance(); // cb_rs + core_timing.AddTicks(core_timing.GetDowncount()); + core_timing.Advance(); // cb_rs REQUIRE(0 == reschedules); - REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount()); + REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount()); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1db0d031d..6036d6ed3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -101,6 +101,22 @@ add_library(video_core STATIC video_core.h ) +if (ENABLE_VULKAN) + target_sources(video_core PRIVATE + renderer_vulkan/declarations.h + renderer_vulkan/vk_device.cpp + renderer_vulkan/vk_device.h + renderer_vulkan/vk_memory_manager.cpp + renderer_vulkan/vk_memory_manager.h + renderer_vulkan/vk_resource_manager.cpp + renderer_vulkan/vk_resource_manager.h + renderer_vulkan/vk_scheduler.cpp + renderer_vulkan/vk_scheduler.h) + + target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) + target_compile_definitions(video_core PRIVATE HAS_VULKAN) +endif() + create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index eb9bf1878..669541b4b 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() { } bool DmaPusher::Step() { - if (dma_get != dma_put) { - // Push buffer non-empty, read a word - const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); - ASSERT_MSG(address, "Invalid GPU address"); + if (!ib_enable || dma_pushbuffer.empty()) { + // pushbuffer empty and IB empty or nonexistent - nothing to do + return false; + } - const CommandHeader command_header{Memory::Read32(*address)}; + const CommandList& command_list{dma_pushbuffer.front()}; + const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; + GPUVAddr dma_get = command_list_header.addr; + GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); + bool non_main = command_list_header.is_non_main; - dma_get += sizeof(u32); + if (dma_pushbuffer_subindex >= command_list.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } - if (!non_main) { - dma_mget = dma_get; - } + if (command_list_header.size == 0) { + return true; + } + + // Push buffer non-empty, read a word + const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); + ASSERT_MSG(address, "Invalid GPU address"); + + command_headers.resize(command_list_header.size); + + Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); + + for (const CommandHeader& command_header : command_headers) { // now, see if we're in the middle of a command if (dma_state.length_pending) { @@ -91,22 +109,11 @@ bool DmaPusher::Step() { break; } } - } else if (ib_enable && !dma_pushbuffer.empty()) { - // Current pushbuffer empty, but we have more IB entries to read - const CommandList& command_list{dma_pushbuffer.front()}; - const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; - dma_get = command_list_header.addr; - dma_put = dma_get + command_list_header.size * sizeof(u32); - non_main = command_list_header.is_non_main; - - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - } - } else { - // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do - return {}; + } + + if (!non_main) { + // TODO (degasus): This is dead code, as dma_mget is never read. + dma_mget = dma_put; } return true; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1097e5c49..27a36348c 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -75,6 +75,8 @@ private: GPU& gpu; + std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once + std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer @@ -89,11 +91,8 @@ private: DmaState dma_state{}; bool dma_increment_once{}; - GPUVAddr dma_put{}; ///< pushbuffer current end address - GPUVAddr dma_get{}; ///< pushbuffer current read address GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled - bool non_main{}; ///< non-main pushbuffer active }; } // namespace Tegra diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 5c1029ddf..4f6126116 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" #include "core/memory.h" @@ -11,9 +12,9 @@ namespace Tegra::Engines { -KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer, +KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), rasterizer{rasterizer} {} + : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {} KeplerMemory::~KeplerMemory() = default; @@ -50,7 +51,7 @@ void KeplerMemory::ProcessData(u32 data) { rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); Memory::Write32(*dest_address, data); - Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); state.write_offset++; } diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index fe9ebc5b9..f680c2ad9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -5,13 +5,16 @@ #pragma once #include <array> -#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" +namespace Core { +class System; +} + namespace VideoCore { class RasterizerInterface; } @@ -23,7 +26,8 @@ namespace Tegra::Engines { class KeplerMemory final { public: - KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); + KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. @@ -76,6 +80,7 @@ public: } state{}; private: + Core::System& system; MemoryManager& memory_manager; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 19b6b14b2..2d2136067 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -19,8 +19,10 @@ namespace Tegra::Engines { /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; -Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) { +Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager) + : memory_manager(memory_manager), system{system}, rasterizer{rasterizer}, + macro_interpreter(*this) { InitializeRegisterDefaults(); } @@ -103,7 +105,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { } void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { - auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); + auto debug_context = system.GetGPUDebugContext(); // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. @@ -317,7 +319,7 @@ void Maxwell3D::ProcessQueryGet() { LongQueryResult query_result{}; query_result.value = result; // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = Core::Timing::GetTicks(); + query_result.timestamp = system.CoreTiming().GetTicks(); Memory::WriteBlock(*address, &query_result, sizeof(query_result)); } dirty_flags.OnMemoryWrite(); @@ -334,7 +336,7 @@ void Maxwell3D::DrawArrays() { regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); - auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); + auto debug_context = system.GetGPUDebugContext(); if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1f76aa670..0e3873ffd 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,6 +17,10 @@ #include "video_core/memory_manager.h" #include "video_core/textures/texture.h" +namespace Core { +class System; +} + namespace VideoCore { class RasterizerInterface; } @@ -28,7 +32,8 @@ namespace Tegra::Engines { class Maxwell3D final { public: - explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); + explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager); ~Maxwell3D() = default; /// Register structure of the Maxwell3D engine. @@ -1131,6 +1136,8 @@ public: private: void InitializeRegisterDefaults(); + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; /// Start offsets of each macro in macro_memory diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index d6c41a5ae..529a14ec7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "core/core.h" #include "core/memory.h" #include "video_core/engines/maxwell_3d.h" @@ -11,8 +12,9 @@ namespace Tegra::Engines { -MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), rasterizer{rasterizer} {} +MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager) + : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {} void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, @@ -59,7 +61,7 @@ void MaxwellDMA::HandleCopy() { } // All copies here update the main memory, so mark all rasterizer states as invalid. - Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 1f8cd65d2..cf75aeb12 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -5,13 +5,16 @@ #pragma once #include <array> -#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" +namespace Core { +class System; +} + namespace VideoCore { class RasterizerInterface; } @@ -20,7 +23,8 @@ namespace Tegra::Engines { class MaxwellDMA final { public: - explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); + explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager); ~MaxwellDMA() = default; /// Write the value to the register identified by method. @@ -137,6 +141,8 @@ public: MemoryManager& memory_manager; private: + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; /// Performs the copy from the source buffer to the destination buffer as configured in the diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 3d00c308b..ac30d1a89 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" @@ -27,14 +28,14 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { UNREACHABLE(); } -GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { +GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { memory_manager = std::make_unique<Tegra::MemoryManager>(); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); - maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); + maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); - maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager); + maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); + kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); } GPU::~GPU() = default; @@ -283,7 +284,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = Core::Timing::GetTicks(); + block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); Memory::WriteBlock(*address, &block, sizeof(block)); } else { const auto address = diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a482196ea..0f5bfdcbf 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -6,12 +6,15 @@ #include <array> #include <memory> -#include <vector> #include "common/common_types.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/dma_pusher.h" #include "video_core/memory_manager.h" +namespace Core { +class System; +} + namespace VideoCore { class RasterizerInterface; } @@ -118,7 +121,7 @@ enum class EngineID { class GPU final { public: - explicit GPU(VideoCore::RasterizerInterface& rasterizer); + explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); ~GPU(); struct MethodCall { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 59f671048..e6d47ce41 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -423,7 +423,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, for (u32 i = 0; i < params.depth; i++) { MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, + params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, gl_buffer.data() + offset_gl, gl_size, params.addr + offset); offset += layer_size; offset_gl += gl_size; @@ -1257,7 +1257,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - FastLayeredCopySurface(old_surface, new_surface); + if (old_params.pixel_format == new_params.pixel_format) + FastLayeredCopySurface(old_surface, new_surface); + else { + AccurateCopySurface(old_surface, new_surface); + } break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index b81882d04..89d733c50 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -36,7 +36,6 @@ using PixelFormat = VideoCore::Surface::PixelFormat; using ComponentType = VideoCore::Surface::ComponentType; struct SurfaceParams { - enum class SurfaceClass { Uploaded, RenderTarget, @@ -169,20 +168,27 @@ struct SurfaceParams { } u32 MipBlockDepth(u32 mip_level) const { - if (mip_level == 0) + if (mip_level == 0) { return block_depth; - if (is_layered) + } + + if (is_layered) { return 1; - u32 depth = MipDepth(mip_level); + } + + const u32 mip_depth = MipDepth(mip_level); u32 bd = 32; - while (bd > 1 && depth * 2 <= bd) { + while (bd > 1 && mip_depth * 2 <= bd) { bd >>= 1; } + if (bd == 32) { - u32 bh = MipBlockHeight(mip_level); - if (bh >= 4) + const u32 bh = MipBlockHeight(mip_level); + if (bh >= 4) { return 16; + } } + return bd; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f4140624e..72ff6ac6a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -606,17 +606,8 @@ private: std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { std::string value = VisitOperand(operation, operand_index); - switch (type) { - case Type::Bool: - case Type::Bool2: - case Type::Float: - return value; - case Type::Int: - return "ftoi(" + value + ')'; - case Type::Uint: - return "ftou(" + value + ')'; - case Type::HalfFloat: + case Type::HalfFloat: { const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); if (!half_meta) { value = "toHalf2(" + value + ')'; @@ -633,6 +624,26 @@ private: return "vec2(toHalf2(" + value + ")[1])"; } } + default: + return CastOperand(value, type); + } + } + + std::string CastOperand(const std::string& value, Type type) const { + switch (type) { + case Type::Bool: + case Type::Bool2: + case Type::Float: + return value; + case Type::Int: + return "ftoi(" + value + ')'; + case Type::Uint: + return "ftou(" + value + ')'; + case Type::HalfFloat: + // Can't be handled as a stand-alone value + UNREACHABLE(); + return value; + } UNREACHABLE(); return value; } @@ -640,6 +651,7 @@ private: std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { switch (type) { case Type::Bool: + case Type::Bool2: case Type::Float: if (needs_parenthesis) { return '(' + value + ')'; @@ -711,7 +723,7 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - const auto count = static_cast<u32>(operation.GetOperandsCount()); + const std::size_t count = operation.GetOperandsCount(); const bool has_array = meta->sampler.IsArray(); const bool has_shadow = meta->sampler.IsShadow(); @@ -722,10 +734,10 @@ private: expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; - for (u32 i = 0; i < count; ++i) { + for (std::size_t i = 0; i < count; ++i) { expr += Visit(operation[i]); - const u32 next = i + 1; + const std::size_t next = i + 1; if (next < count || has_array || has_shadow) expr += ", "; } @@ -1196,25 +1208,26 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); UNIMPLEMENTED_IF(meta->sampler.IsArray()); - UNIMPLEMENTED_IF(!meta->extras.empty()); - - const auto count = static_cast<u32>(operation.GetOperandsCount()); + const std::size_t count = operation.GetOperandsCount(); std::string expr = "texelFetch("; expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors.at(count - 1); + expr += constructors.at(operation.GetOperandsCount() - 1); expr += '('; - for (u32 i = 0; i < count; ++i) { + for (std::size_t i = 0; i < count; ++i) { expr += VisitOperand(operation, i, Type::Int); - - const u32 next = i + 1; + const std::size_t next = i + 1; if (next == count) expr += ')'; - if (next < count) + else if (next < count) expr += ", "; } + for (std::size_t i = 0; i < meta->extras.size(); ++i) { + expr += ", "; + expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); + } expr += ')'; return expr + GetSwizzle(meta->element); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 81af803bc..219f08053 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -11,7 +11,9 @@ namespace OpenGL { OpenGLState OpenGLState::cur_state; + bool OpenGLState::s_rgb_used; + OpenGLState::OpenGLState() { // These all match default OpenGL values geometry_shaders.enabled = false; @@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() { } void OpenGLState::ApplySRgb() const { - // sRGB if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { if (framebuffer_srgb.enabled) { // Track if sRGB is used @@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const { } void OpenGLState::ApplyCulling() const { - // Culling - const bool cull_changed = cull.enabled != cur_state.cull.enabled; - if (cull_changed) { + if (cull.enabled != cur_state.cull.enabled) { if (cull.enabled) { glEnable(GL_CULL_FACE); } else { glDisable(GL_CULL_FACE); } } - if (cull.enabled) { - if (cull_changed || cull.mode != cur_state.cull.mode) { - glCullFace(cull.mode); - } - if (cull_changed || cull.front_face != cur_state.cull.front_face) { - glFrontFace(cull.front_face); - } + if (cull.mode != cur_state.cull.mode) { + glCullFace(cull.mode); + } + + if (cull.front_face != cur_state.cull.front_face) { + glFrontFace(cull.front_face); } } @@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const { } void OpenGLState::ApplyDepth() const { - // Depth test - const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled; - if (depth_test_changed) { + if (depth.test_enabled != cur_state.depth.test_enabled) { if (depth.test_enabled) { glEnable(GL_DEPTH_TEST); } else { glDisable(GL_DEPTH_TEST); } } - if (depth.test_enabled && - (depth_test_changed || depth.test_func != cur_state.depth.test_func)) { + + if (depth.test_func != cur_state.depth.test_func) { glDepthFunc(depth.test_func); } - // Depth mask + if (depth.write_mask != cur_state.depth.write_mask) { glDepthMask(depth.write_mask); } } void OpenGLState::ApplyPrimitiveRestart() const { - const bool primitive_restart_changed = - primitive_restart.enabled != cur_state.primitive_restart.enabled; - if (primitive_restart_changed) { + if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { if (primitive_restart.enabled) { glEnable(GL_PRIMITIVE_RESTART); } else { glDisable(GL_PRIMITIVE_RESTART); } } - if (primitive_restart_changed || - (primitive_restart.enabled && - primitive_restart.index != cur_state.primitive_restart.index)) { + + if (primitive_restart.index != cur_state.primitive_restart.index) { glPrimitiveRestartIndex(primitive_restart.index); } } void OpenGLState::ApplyStencilTest() const { - const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; - if (stencil_test_changed) { + if (stencil.test_enabled != cur_state.stencil.test_enabled) { if (stencil.test_enabled) { glEnable(GL_STENCIL_TEST); } else { glDisable(GL_STENCIL_TEST); } } - if (stencil.test_enabled) { - auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, - const auto& prev_config) { - if (stencil_test_changed || config.test_func != prev_config.test_func || - config.test_ref != prev_config.test_ref || - config.test_mask != prev_config.test_mask) { - glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); - } - if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || - config.action_depth_pass != prev_config.action_depth_pass || - config.action_stencil_fail != prev_config.action_stencil_fail) { - glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, - config.action_depth_pass); - } - if (config.write_mask != prev_config.write_mask) { - glStencilMaskSeparate(face, config.write_mask); - } - }; - config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front); - config_stencil(GL_BACK, stencil.back, cur_state.stencil.back); - } + + const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) { + if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || + config.test_mask != prev_config.test_mask) { + glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); + } + if (config.action_depth_fail != prev_config.action_depth_fail || + config.action_depth_pass != prev_config.action_depth_pass || + config.action_stencil_fail != prev_config.action_stencil_fail) { + glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, + config.action_depth_pass); + } + if (config.write_mask != prev_config.write_mask) { + glStencilMaskSeparate(face, config.write_mask); + } + }; + ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); + ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } // Viewport does not affects glClearBuffer so emulate viewport using scissor test void OpenGLState::EmulateViewportWithScissor() { @@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const { updated.depth_range_far != current.depth_range_far) { glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); } - const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; - if (scissor_changed) { + + if (updated.scissor.enabled != current.scissor.enabled) { if (updated.scissor.enabled) { glEnablei(GL_SCISSOR_TEST, i); } else { glDisablei(GL_SCISSOR_TEST, i); } } - if (updated.scissor.enabled && - (scissor_changed || updated.scissor.x != current.scissor.x || - updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height)) { + + if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || + updated.scissor.width != current.scissor.width || + updated.scissor.height != current.scissor.height) { glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, updated.scissor.height); } @@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const { updated.height != current.height) { glViewport(updated.x, updated.y, updated.width, updated.height); } + if (updated.depth_range_near != current.depth_range_near || updated.depth_range_far != current.depth_range_far) { glDepthRange(updated.depth_range_near, updated.depth_range_far); } - const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; - if (scissor_changed) { + + if (updated.scissor.enabled != current.scissor.enabled) { if (updated.scissor.enabled) { glEnable(GL_SCISSOR_TEST); } else { glDisable(GL_SCISSOR_TEST); } } - if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x || - updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height)) { + + if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || + updated.scissor.width != current.scissor.width || + updated.scissor.height != current.scissor.height) { glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, updated.scissor.height); } @@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const { void OpenGLState::ApplyGlobalBlending() const { const Blend& current = cur_state.blend[0]; const Blend& updated = blend[0]; - const bool blend_changed = updated.enabled != current.enabled; - if (blend_changed) { + if (updated.enabled != current.enabled) { if (updated.enabled) { glEnable(GL_BLEND); } else { @@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const { if (!updated.enabled) { return; } - if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + if (updated.src_rgb_func != current.src_rgb_func || updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) { glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (blend_changed || updated.rgb_equation != current.rgb_equation || - updated.a_equation != current.a_equation) { + if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); } } @@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const { void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { const Blend& updated = blend[target]; const Blend& current = cur_state.blend[target]; - const bool blend_changed = updated.enabled != current.enabled || force; - if (blend_changed) { + if (updated.enabled != current.enabled || force) { if (updated.enabled) { glEnablei(GL_BLEND, static_cast<GLuint>(target)); } else { glDisablei(GL_BLEND, static_cast<GLuint>(target)); } } - if (!updated.enabled) { - return; - } - if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + + if (updated.src_rgb_func != current.src_rgb_func || updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) { glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (blend_changed || updated.rgb_equation != current.rgb_equation || - updated.a_equation != current.a_equation) { + if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, updated.a_equation); } @@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const { } void OpenGLState::ApplyLogicOp() const { - const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; - if (logic_op_changed) { + if (logic_op.enabled != cur_state.logic_op.enabled) { if (logic_op.enabled) { glEnable(GL_COLOR_LOGIC_OP); } else { @@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const { } } - if (logic_op.enabled && - (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) { + if (logic_op.operation != cur_state.logic_op.operation) { glLogicOp(logic_op.operation); } } void OpenGLState::ApplyPolygonOffset() const { - const bool fill_enable_changed = polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; const bool line_enable_changed = @@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const { } } - if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && - (factor_changed || units_changed || clamp_changed)) { - + if (factor_changed || units_changed || clamp_changed) { if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); } else { @@ -528,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const { depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { return; } - if (depth_clamp.far_plane != depth_clamp.near_plane) { - UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!"); - } + UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, + "Unimplemented Depth Clamp Separation!"); + if (depth_clamp.far_plane || depth_clamp.near_plane) { glEnable(GL_DEPTH_CLAMP); } else { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cca2ed708..272fc2e8e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -137,7 +137,7 @@ void RendererOpenGL::SwapBuffers( render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(Core::Timing::GetGlobalTimeUs()); + system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); system.GetPerfStats().BeginSystemFrame(); // Restore the rasterizer state @@ -380,7 +380,8 @@ void RendererOpenGL::CaptureScreenshot() { GLuint renderbuffer; glGenRenderbuffers(1, &renderbuffer); glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); - glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); + glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width, + layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); DrawScreen(layout); diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h new file mode 100644 index 000000000..ba25b5bc7 --- /dev/null +++ b/src/video_core/renderer_vulkan/declarations.h @@ -0,0 +1,45 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vulkan/vulkan.hpp> + +namespace Vulkan { + +// vulkan.hpp unique handlers use DispatchLoaderStatic +template <typename T> +using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>; + +using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>; +using UniqueBuffer = UniqueHandle<vk::Buffer>; +using UniqueBufferView = UniqueHandle<vk::BufferView>; +using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>; +using UniqueCommandPool = UniqueHandle<vk::CommandPool>; +using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>; +using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>; +using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>; +using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>; +using UniqueDevice = UniqueHandle<vk::Device>; +using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>; +using UniqueEvent = UniqueHandle<vk::Event>; +using UniqueFence = UniqueHandle<vk::Fence>; +using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; +using UniqueImage = UniqueHandle<vk::Image>; +using UniqueImageView = UniqueHandle<vk::ImageView>; +using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; +using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; +using UniquePipeline = UniqueHandle<vk::Pipeline>; +using UniquePipelineCache = UniqueHandle<vk::PipelineCache>; +using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>; +using UniqueQueryPool = UniqueHandle<vk::QueryPool>; +using UniqueRenderPass = UniqueHandle<vk::RenderPass>; +using UniqueSampler = UniqueHandle<vk::Sampler>; +using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; +using UniqueSemaphore = UniqueHandle<vk::Semaphore>; +using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; +using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; +using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp new file mode 100644 index 000000000..78a4e5f0e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -0,0 +1,231 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <optional> +#include <set> +#include <vector> +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" + +namespace Vulkan { + +namespace Alternatives { + +constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { + vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; +constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { + vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; + +} // namespace Alternatives + +constexpr const vk::Format* GetFormatAlternatives(vk::Format format) { + switch (format) { + case vk::Format::eD24UnormS8Uint: + return Alternatives::Depth24UnormS8Uint.data(); + case vk::Format::eD16UnormS8Uint: + return Alternatives::Depth16UnormS8Uint.data(); + default: + return nullptr; + } +} + +constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, + FormatType format_type) { + switch (format_type) { + case FormatType::Linear: + return properties.linearTilingFeatures; + case FormatType::Optimal: + return properties.optimalTilingFeatures; + case FormatType::Buffer: + return properties.bufferFeatures; + default: + return {}; + } +} + +VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface) + : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { + SetupFamilies(dldi, surface); + SetupProperties(dldi); +} + +VKDevice::~VKDevice() = default; + +bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { + const auto queue_cis = GetDeviceQueueCreateInfos(); + vk::PhysicalDeviceFeatures device_features{}; + + const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), + 0, nullptr, static_cast<u32>(extensions.size()), + extensions.data(), &device_features); + vk::Device dummy_logical; + if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); + return false; + } + + dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); + logical = UniqueDevice( + dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + + graphics_queue = logical->getQueue(graphics_family, 0, dld); + present_queue = logical->getQueue(present_family, 0, dld); + return true; +} + +vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, + vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const { + if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { + return wanted_format; + } + // The wanted format is not supported by hardware, search for alternatives + const vk::Format* alternatives = GetFormatAlternatives(wanted_format); + if (alternatives == nullptr) { + LOG_CRITICAL(Render_Vulkan, + "Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); + UNREACHABLE(); + return wanted_format; + } + + std::size_t i = 0; + for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; + alternative = alternatives[++i]) { + if (!IsFormatSupported(alternative, wanted_usage, format_type)) + continue; + LOG_WARNING(Render_Vulkan, + "Emulating format={} with alternative format={} with usage={} and type={}", + static_cast<u32>(wanted_format), static_cast<u32>(alternative), + static_cast<u32>(wanted_usage), static_cast<u32>(format_type)); + return alternative; + } + + // No alternatives found, panic + LOG_CRITICAL(Render_Vulkan, + "Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); + UNREACHABLE(); + return wanted_format; +} + +bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const { + const auto it = format_properties.find(wanted_format); + if (it == format_properties.end()) { + LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", + static_cast<u32>(wanted_format)); + UNREACHABLE(); + return true; + } + const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); + return (supported_usage & wanted_usage) == wanted_usage; +} + +bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface) { + const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; + + bool has_swapchain{}; + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + has_swapchain |= prop.extensionName == swapchain_extension; + } + if (!has_swapchain) { + // The device doesn't support creating swapchains. + return false; + } + + bool has_graphics{}, has_present{}; + const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { + const auto& family = queue_family_properties[i]; + if (family.queueCount == 0) + continue; + + has_graphics |= + (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); + has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; + } + if (!has_graphics || !has_present) { + // The device doesn't have a graphics and present queue. + return false; + } + + // TODO(Rodrigo): Check if the device matches all requeriments. + const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + if (props.limits.maxUniformBufferRange < 65536) { + return false; + } + + // Device is suitable. + return true; +} + +void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { + std::optional<u32> graphics_family_, present_family_; + + const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { + if (graphics_family_ && present_family_) + break; + + const auto& queue_family = queue_family_properties[i]; + if (queue_family.queueCount == 0) + continue; + + if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) + graphics_family_ = i; + if (physical.getSurfaceSupportKHR(i, surface, dldi)) + present_family_ = i; + } + ASSERT(graphics_family_ && present_family_); + + graphics_family = *graphics_family_; + present_family = *present_family_; +} + +void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { + const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + device_type = props.deviceType; + uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); +} + +std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { + static const float QUEUE_PRIORITY = 1.f; + + std::set<u32> unique_queue_families = {graphics_family, present_family}; + std::vector<vk::DeviceQueueCreateInfo> queue_cis; + + for (u32 queue_family : unique_queue_families) + queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); + + return queue_cis; +} + +std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( + const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + std::map<vk::Format, vk::FormatProperties> format_properties; + + const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + format_properties.emplace(format, physical.getFormatProperties(format, dldi)); + }; + AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); + AddFormatQuery(vk::Format::eR5G6B5UnormPack16); + AddFormatQuery(vk::Format::eD32Sfloat); + AddFormatQuery(vk::Format::eD16UnormS8Uint); + AddFormatQuery(vk::Format::eD24UnormS8Uint); + AddFormatQuery(vk::Format::eD32SfloatS8Uint); + + return format_properties; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h new file mode 100644 index 000000000..e87c7a508 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -0,0 +1,116 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <vector> +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +/// Format usage descriptor +enum class FormatType { Linear, Optimal, Buffer }; + +/// Handles data specific to a physical device. +class VKDevice final { +public: + explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface); + ~VKDevice(); + + /// Initializes the device. Returns true on success. + bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); + + /** + * Returns a format supported by the device for the passed requeriments. + * @param wanted_format The ideal format to be returned. It may not be the returned format. + * @param wanted_usage The usage that must be fulfilled even if the format is not supported. + * @param format_type Format type usage. + * @returns A format supported by the device. + */ + vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Returns the dispatch loader with direct function pointers of the device + const vk::DispatchLoaderDynamic& GetDispatchLoader() const { + return dld; + } + + /// Returns the logical device + vk::Device GetLogical() const { + return logical.get(); + } + + /// Returns the physical device. + vk::PhysicalDevice GetPhysical() const { + return physical; + } + + /// Returns the main graphics queue. + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + /// Returns the main present queue. + vk::Queue GetPresentQueue() const { + return present_queue; + } + + /// Returns main graphics queue family index. + u32 GetGraphicsFamily() const { + return graphics_family; + } + + /// Returns main present queue family index. + u32 GetPresentFamily() const { + return present_family; + } + + /// Returns if the device is integrated with the host CPU + bool IsIntegrated() const { + return device_type == vk::PhysicalDeviceType::eIntegratedGpu; + } + + /// Returns uniform buffer alignment requeriment + u64 GetUniformBufferAlignment() const { + return uniform_buffer_alignment; + } + + /// Checks if the physical device is suitable. + static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + vk::SurfaceKHR surface); + +private: + /// Sets up queue families. + void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); + + /// Sets up device properties. + void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + + /// Returns a list of queue initialization descriptors. + std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + + /// Returns true if a format is supported. + bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + FormatType format_type) const; + + /// Returns the device properties for Vulkan formats. + static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( + const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); + + const vk::PhysicalDevice physical; ///< Physical device + vk::DispatchLoaderDynamic dld; ///< Device function pointers + UniqueDevice logical; ///< Logical device + vk::Queue graphics_queue; ///< Main graphics queue + vk::Queue present_queue; ///< Main present queue + u32 graphics_family{}; ///< Main graphics queue family index + u32 present_family{}; ///< Main present queue family index + vk::PhysicalDeviceType device_type; ///< Physical device type + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment + std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp new file mode 100644 index 000000000..17ee93b91 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -0,0 +1,252 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include <tuple> +#include <vector> +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" + +namespace Vulkan { + +// TODO(Rodrigo): Fine tune this number +constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; + +class VKMemoryAllocation final { +public: + explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, + vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) + : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, + shifted_type{ShiftType(type)}, is_mappable{properties & + vk::MemoryPropertyFlagBits::eHostVisible} { + if (is_mappable) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld)); + } + } + + ~VKMemoryAllocation() { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + if (is_mappable) + dev.unmapMemory(memory, dld); + dev.free(memory, nullptr, dld); + } + + VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { + auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), + static_cast<u64>(alignment)); + if (!found) { + found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), + static_cast<u64>(alignment)); + if (!found) { + // Signal out of memory, it'll try to do more allocations. + return nullptr; + } + } + u8* address = is_mappable ? base_address + *found : nullptr; + auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found, + *found + commit_size); + commits.push_back(commit.get()); + + // Last commit's address is highly probable to be free. + free_iterator = *found + commit_size; + + return commit; + } + + void Free(const VKMemoryCommitImpl* commit) { + ASSERT(commit); + const auto it = + std::find_if(commits.begin(), commits.end(), + [&](const auto& stored_commit) { return stored_commit == commit; }); + if (it == commits.end()) { + LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); + UNREACHABLE(); + return; + } + commits.erase(it); + } + + /// Returns whether this allocation is compatible with the arguments. + bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const { + return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) && + (type_mask & shifted_type) != 0; + } + +private: + static constexpr u32 ShiftType(u32 type) { + return 1U << type; + } + + /// A memory allocator, it may return a free region between "start" and "end" with the solicited + /// requeriments. + std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { + u64 iterator = start; + while (iterator + size < end) { + const u64 try_left = Common::AlignUp(iterator, alignment); + const u64 try_right = try_left + size; + + bool overlap = false; + for (const auto& commit : commits) { + const auto [commit_left, commit_right] = commit->interval; + if (try_left < commit_right && commit_left < try_right) { + // There's an overlap, continue the search where the overlapping commit ends. + iterator = commit_right; + overlap = true; + break; + } + } + if (!overlap) { + // A free address has been found. + return try_left; + } + } + // No free regions where found, return an empty optional. + return std::nullopt; + } + + const VKDevice& device; ///< Vulkan device. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const vk::MemoryPropertyFlags properties; ///< Vulkan properties. + const u64 alloc_size; ///< Size of this allocation. + const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. + const bool is_mappable; ///< Whether the allocation is mappable. + + /// Base address of the mapped pointer. + u8* base_address{}; + + /// Hints where the next free region is likely going to be. + u64 free_iterator{}; + + /// Stores all commits done from this allocation. + std::vector<const VKMemoryCommitImpl*> commits; +}; + +VKMemoryManager::VKMemoryManager(const VKDevice& device) + : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, + is_memory_unified{GetMemoryUnified(props)} {} + +VKMemoryManager::~VKMemoryManager() = default; + +VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { + ASSERT(reqs.size < ALLOC_CHUNK_SIZE); + + // When a host visible commit is asked, search for host visible and coherent, otherwise search + // for a fast device local type. + const vk::MemoryPropertyFlags wanted_properties = + host_visible + ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent + : vk::MemoryPropertyFlagBits::eDeviceLocal; + + const auto TryCommit = [&]() -> VKMemoryCommit { + for (auto& alloc : allocs) { + if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits)) + continue; + + if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) { + return commit; + } + } + return {}; + }; + + if (auto commit = TryCommit(); commit) { + return commit; + } + + // Commit has failed, allocate more memory. + if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { + // TODO(Rodrigo): Try to use host memory. + LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); + UNREACHABLE(); + } + + // Commit again, this time it won't fail since there's a fresh allocation above. If it does, + // there's a bug. + auto commit = TryCommit(); + ASSERT(commit); + return commit; +} + +VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); + auto commit = Commit(requeriments, host_visible); + dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); + return commit; +} + +VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const auto requeriments = dev.getImageMemoryRequirements(image, dld); + auto commit = Commit(requeriments, host_visible); + dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); + return commit; +} + +bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, + u64 size) { + const u32 type = [&]() { + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + const auto flags = props.memoryTypes[type_index].propertyFlags; + if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { + // The type matches in type and in the wanted properties. + return type_index; + } + } + LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); + UNREACHABLE(); + return 0u; + }(); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + + // Try to allocate found type. + const vk::MemoryAllocateInfo memory_ai(size, type); + vk::DeviceMemory memory; + if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); + res != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); + return false; + } + allocs.push_back( + std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); + return true; +} + +/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { + for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { + if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { + // Memory is considered unified when heaps are device local only. + return false; + } + } + return true; +} + +VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, + u8* data, u64 begin, u64 end) + : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} + +VKMemoryCommitImpl::~VKMemoryCommitImpl() { + allocation->Free(this); +} + +u8* VKMemoryCommitImpl::GetData() const { + ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); + return data; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h new file mode 100644 index 000000000..073597b35 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -0,0 +1,87 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <utility> +#include <vector> +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKMemoryAllocation; +class VKMemoryCommitImpl; + +using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; + +class VKMemoryManager final { +public: + explicit VKMemoryManager(const VKDevice& device); + ~VKMemoryManager(); + + /** + * Commits a memory with the specified requeriments. + * @param reqs Requeriments returned from a Vulkan call. + * @param host_visible Signals the allocator that it *must* use host visible and coherent + * memory. When passing false, it will try to allocate device local memory. + * @returns A memory commit. + */ + VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); + + /// Commits memory required by the buffer and binds it. + VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible); + + /// Commits memory required by the image and binds it. + VKMemoryCommit Commit(vk::Image image, bool host_visible); + + /// Returns true if the memory allocations are done always in host visible and coherent memory. + bool IsMemoryUnified() const { + return is_memory_unified; + } + +private: + /// Allocates a chunk of memory. + bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); + + /// Returns true if the device uses an unified memory model. + static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); + + const VKDevice& device; ///< Device handler. + const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. + const bool is_memory_unified; ///< True if memory model is unified. + std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. +}; + +class VKMemoryCommitImpl final { + friend VKMemoryAllocation; + +public: + explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, + u64 begin, u64 end); + ~VKMemoryCommitImpl(); + + /// Returns the writeable memory map. The commit has to be mappable. + u8* GetData() const; + + /// Returns the Vulkan memory handler. + vk::DeviceMemory GetMemory() const { + return memory; + } + + /// Returns the start position of the commit relative to the allocation. + vk::DeviceSize GetOffset() const { + return static_cast<vk::DeviceSize>(interval.first); + } + +private: + std::pair<u64, u64> interval{}; ///< Interval where the commit exists. + vk::DeviceMemory memory; ///< Vulkan device memory handler. + VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. + u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp new file mode 100644 index 000000000..1678463c7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -0,0 +1,285 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace Vulkan { + +// TODO(Rodrigo): Fine tune these numbers. +constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; +constexpr std::size_t FENCES_GROW_STEP = 0x40; + +class CommandBufferPool final : public VKFencedPool { +public: + CommandBufferPool(const VKDevice& device) + : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} + + void Allocate(std::size_t begin, std::size_t end) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const u32 graphics_family = device.GetGraphicsFamily(); + + auto pool = std::make_unique<Pool>(); + + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient | + vk::CommandPoolCreateFlagBits::eResetCommandBuffer; + const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family); + pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld); + + const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle, + vk::CommandBufferLevel::ePrimary, + static_cast<u32>(COMMAND_BUFFER_POOL_SIZE)); + pool->cmdbufs = + dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld); + + pools.push_back(std::move(pool)); + } + + vk::CommandBuffer Commit(VKFence& fence) { + const std::size_t index = CommitResource(fence); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return *pools[pool_index]->cmdbufs[sub_index]; + } + +private: + struct Pool { + UniqueCommandPool handle; + std::vector<UniqueCommandBuffer> cmdbufs; + }; + + const VKDevice& device; + + std::vector<std::unique_ptr<Pool>> pools; +}; + +VKResource::VKResource() = default; + +VKResource::~VKResource() = default; + +VKFence::VKFence(const VKDevice& device, UniqueFence handle) + : device{device}, handle{std::move(handle)} {} + +VKFence::~VKFence() = default; + +void VKFence::Wait() { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); +} + +void VKFence::Release() { + is_owned = false; +} + +void VKFence::Commit() { + is_owned = true; + is_used = true; +} + +bool VKFence::Tick(bool gpu_wait, bool owner_wait) { + if (!is_used) { + // If a fence is not used it's always free. + return true; + } + if (is_owned && !owner_wait) { + // The fence is still being owned (Release has not been called) and ownership wait has + // not been asked. + return false; + } + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + if (gpu_wait) { + // Wait for the fence if it has been requested. + dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); + } else { + if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) { + // Vulkan fence is not ready, not much it can do here + return false; + } + } + + // Broadcast resources their free state. + for (auto* resource : protected_resources) { + resource->OnFenceRemoval(this); + } + protected_resources.clear(); + + // Prepare fence for reusage. + dev.resetFences({*handle}, dld); + is_used = false; + return true; +} + +void VKFence::Protect(VKResource* resource) { + protected_resources.push_back(resource); +} + +void VKFence::Unprotect(const VKResource* resource) { + const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); + if (it != protected_resources.end()) { + protected_resources.erase(it); + } +} + +VKFenceWatch::VKFenceWatch() = default; + +VKFenceWatch::~VKFenceWatch() { + if (fence) { + fence->Unprotect(this); + } +} + +void VKFenceWatch::Wait() { + if (!fence) { + return; + } + fence->Wait(); + fence->Unprotect(this); + fence = nullptr; +} + +void VKFenceWatch::Watch(VKFence& new_fence) { + Wait(); + fence = &new_fence; + fence->Protect(this); +} + +bool VKFenceWatch::TryWatch(VKFence& new_fence) { + if (fence) { + return false; + } + fence = &new_fence; + fence->Protect(this); + return true; +} + +void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) { + ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence"); + fence = nullptr; +} + +VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {} + +VKFencedPool::~VKFencedPool() = default; + +std::size_t VKFencedPool::CommitResource(VKFence& fence) { + const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> { + for (std::size_t iterator = begin; iterator < end; ++iterator) { + if (watches[iterator]->TryWatch(fence)) { + // The resource is now being watched, a free resource was successfully found. + return iterator; + } + } + return {}; + }; + // Try to find a free resource from the hinted position to the end. + auto found = Search(free_iterator, watches.size()); + if (!found) { + // Search from beginning to the hinted position. + found = Search(0, free_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const std::size_t free_resource = ManageOverflow(); + + // Watch will wait for the resource to be free. + watches[free_resource]->Watch(fence); + found = free_resource; + } + } + // Free iterator is hinted to the resource after the one that's been commited. + free_iterator = (*found + 1) % watches.size(); + return *found; +} + +std::size_t VKFencedPool::ManageOverflow() { + const std::size_t old_capacity = watches.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void VKFencedPool::Grow() { + const std::size_t old_capacity = watches.size(); + watches.resize(old_capacity + grow_step); + std::generate(watches.begin() + old_capacity, watches.end(), + []() { return std::make_unique<VKFenceWatch>(); }); + Allocate(old_capacity, old_capacity + grow_step); +} + +VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} { + GrowFences(FENCES_GROW_STEP); + command_buffer_pool = std::make_unique<CommandBufferPool>(device); +} + +VKResourceManager::~VKResourceManager() = default; + +VKFence& VKResourceManager::CommitFence() { + const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* { + const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); }; + const auto hinted = fences.begin() + fences_iterator; + + auto it = std::find_if(hinted, fences.end(), Tick); + if (it == fences.end()) { + it = std::find_if(fences.begin(), hinted, Tick); + if (it == hinted) { + return nullptr; + } + } + fences_iterator = std::distance(fences.begin(), it) + 1; + if (fences_iterator >= fences.size()) + fences_iterator = 0; + + auto& fence = *it; + fence->Commit(); + return fence.get(); + }; + + VKFence* found_fence = StepFences(false, false); + if (!found_fence) { + // Try again, this time waiting. + found_fence = StepFences(true, false); + + if (!found_fence) { + // Allocate new fences and try again. + LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(), + fences.size() + FENCES_GROW_STEP); + + GrowFences(FENCES_GROW_STEP); + found_fence = StepFences(true, false); + ASSERT(found_fence != nullptr); + } + } + return *found_fence; +} + +vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { + return command_buffer_pool->Commit(fence); +} + +void VKResourceManager::GrowFences(std::size_t new_fences_count) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const vk::FenceCreateInfo fence_ci; + + const std::size_t previous_size = fences.size(); + fences.resize(previous_size + new_fences_count); + + std::generate(fences.begin() + previous_size, fences.end(), [&]() { + return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld)); + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h new file mode 100644 index 000000000..5018dfa44 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h @@ -0,0 +1,180 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <memory> +#include <vector> +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKResourceManager; + +class CommandBufferPool; + +/// Interface for a Vulkan resource +class VKResource { +public: + explicit VKResource(); + virtual ~VKResource(); + + /** + * Signals the object that an owning fence has been signaled. + * @param signaling_fence Fence that signals its usage end. + */ + virtual void OnFenceRemoval(VKFence* signaling_fence) = 0; +}; + +/** + * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access. + * They must be commited from the resource manager. Their usage flow is: commit the fence from the + * resource manager, protect resources with it and use them, send the fence to an execution queue + * and Wait for it if needed and then call Release. Used resources will automatically be signaled + * when they are free to be reused. + * @brief Protects resources for concurrent usage and signals its release. + */ +class VKFence { + friend class VKResourceManager; + +public: + explicit VKFence(const VKDevice& device, UniqueFence handle); + ~VKFence(); + + /** + * Waits for the fence to be signaled. + * @warning You must have ownership of the fence and it has to be previously sent to a queue to + * call this function. + */ + void Wait(); + + /** + * Releases ownership of the fence. Pass after it has been sent to an execution queue. + * Unmanaged usage of the fence after the call will result in undefined behavior because it may + * be being used for something else. + */ + void Release(); + + /// Protects a resource with this fence. + void Protect(VKResource* resource); + + /// Removes protection for a resource. + void Unprotect(const VKResource* resource); + + /// Retreives the fence. + operator vk::Fence() const { + return *handle; + } + +private: + /// Take ownership of the fence. + void Commit(); + + /** + * Updates the fence status. + * @warning Waiting for the owner might soft lock the execution. + * @param gpu_wait Wait for the fence to be signaled by the driver. + * @param owner_wait Wait for the owner to signal its freedom. + * @returns True if the fence is free. Waiting for gpu and owner will always return true. + */ + bool Tick(bool gpu_wait, bool owner_wait); + + const VKDevice& device; ///< Device handler + UniqueFence handle; ///< Vulkan fence + std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence + bool is_owned = false; ///< The fence has been commited but not released yet. + bool is_used = false; ///< The fence has been commited but it has not been checked to be free. +}; + +/** + * A fence watch is used to keep track of the usage of a fence and protect a resource or set of + * resources without having to inherit VKResource from their handlers. + */ +class VKFenceWatch final : public VKResource { +public: + explicit VKFenceWatch(); + ~VKFenceWatch(); + + /// Waits for the fence to be released. + void Wait(); + + /** + * Waits for a previous fence and watches a new one. + * @param new_fence New fence to wait to. + */ + void Watch(VKFence& new_fence); + + /** + * Checks if it's currently being watched and starts watching it if it's available. + * @returns True if a watch has started, false if it's being watched. + */ + bool TryWatch(VKFence& new_fence); + + void OnFenceRemoval(VKFence* signaling_fence) override; + +private: + VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free. +}; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class VKFencedPool { +public: + explicit VKFencedPool(std::size_t grow_step); + virtual ~VKFencedPool(); + +protected: + /** + * Commits a free resource and protects it with a fence. It may allocate new resources. + * @param fence Fence that protects the commited resource. + * @returns Index of the resource commited. + */ + std::size_t CommitResource(VKFence& fence); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(std::size_t begin, std::size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + std::size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + + std::size_t grow_step = 0; ///< Number of new resources created after an overflow + std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources +}; + +/** + * The resource manager handles all resources that can be protected with a fence avoiding + * driver-side or GPU-side concurrent usage. Usage is documented in VKFence. + */ +class VKResourceManager final { +public: + explicit VKResourceManager(const VKDevice& device); + ~VKResourceManager(); + + /// Commits a fence. It has to be sent to a queue and released. + VKFence& CommitFence(); + + /// Commits an unused command buffer and protects it with a fence. + vk::CommandBuffer CommitCommandBuffer(VKFence& fence); + +private: + /// Allocates new fences. + void GrowFences(std::size_t new_fences_count); + + const VKDevice& device; ///< Device handler. + std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found. + std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences. + std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp new file mode 100644 index 000000000..f1fea1871 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -0,0 +1,60 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) + : device{device}, resource_manager{resource_manager} { + next_fence = &resource_manager.CommitFence(); + AllocateNewContext(); +} + +VKScheduler::~VKScheduler() = default; + +VKExecutionContext VKScheduler::GetExecutionContext() const { + return VKExecutionContext(current_fence, current_cmdbuf); +} + +VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) { + SubmitExecution(semaphore); + current_fence->Release(); + AllocateNewContext(); + return GetExecutionContext(); +} + +VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { + SubmitExecution(semaphore); + current_fence->Wait(); + current_fence->Release(); + AllocateNewContext(); + return GetExecutionContext(); +} + +void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { + const auto& dld = device.GetDispatchLoader(); + current_cmdbuf.end(dld); + + const auto queue = device.GetGraphicsQueue(); + const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1u : 0u, + &semaphore); + queue.submit({submit_info}, *current_fence, dld); +} + +void VKScheduler::AllocateNewContext() { + current_fence = next_fence; + current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); + next_fence = &resource_manager.CommitFence(); + + const auto& dld = device.GetDispatchLoader(); + current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h new file mode 100644 index 000000000..cfaf5376f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -0,0 +1,69 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKExecutionContext; +class VKFence; +class VKResourceManager; + +/// The scheduler abstracts command buffer and fence management with an interface that's able to do +/// OpenGL-like operations on Vulkan command buffers. +class VKScheduler { +public: + explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); + ~VKScheduler(); + + /// Gets the current execution context. + [[nodiscard]] VKExecutionContext GetExecutionContext() const; + + /// Sends the current execution context to the GPU. It invalidates the current execution context + /// and returns a new one. + VKExecutionContext Flush(vk::Semaphore semaphore = nullptr); + + /// Sends the current execution context to the GPU and waits for it to complete. It invalidates + /// the current execution context and returns a new one. + VKExecutionContext Finish(vk::Semaphore semaphore = nullptr); + +private: + void SubmitExecution(vk::Semaphore semaphore); + + void AllocateNewContext(); + + const VKDevice& device; + VKResourceManager& resource_manager; + vk::CommandBuffer current_cmdbuf; + VKFence* current_fence = nullptr; + VKFence* next_fence = nullptr; +}; + +class VKExecutionContext { + friend class VKScheduler; + +public: + VKExecutionContext() = default; + + VKFence& GetFence() const { + return *fence; + } + + vk::CommandBuffer GetCommandBuffer() const { + return cmdbuf; + } + +private: + explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf) + : fence{fence}, cmdbuf{cmdbuf} {} + + VKFence* fence{}; + vk::CommandBuffer cmdbuf; +}; + +} // namespace Vulkan diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 9a1d1de94..38f01ca50 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -429,7 +429,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); + LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); } WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); |