diff options
Diffstat (limited to 'src')
88 files changed, 1236 insertions, 477 deletions
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 201ec7a3c..b2e5d336c 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -46,16 +46,18 @@ struct AudioRendererParameter { u32_le sample_rate; u32_le sample_count; u32_le mix_buffer_count; - u32_le unknown_c; + u32_le submix_count; u32_le voice_count; u32_le sink_count; u32_le effect_count; - u32_le unknown_1c; - u8 unknown_20; - INSERT_PADDING_BYTES(3); + u32_le performance_frame_count; + u8 is_voice_drop_enabled; + u8 unknown_21; + u8 unknown_22; + u8 execution_mode; u32_le splitter_count; - u32_le unknown_2c; - INSERT_PADDING_WORDS(1); + u32_le num_splitter_send_channels; + u32_le unknown_30; u32_le revision; }; static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index dc45dedd3..1da0b9f2a 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp @@ -12,6 +12,10 @@ #include "common/ring_buffer.h" #include "core/settings.h" +#ifdef _MSC_VER +#include <objbase.h> +#endif + namespace AudioCore { class CubebSinkStream final : public SinkStream { @@ -108,6 +112,11 @@ private: }; CubebSink::CubebSink(std::string_view target_device_name) { + // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows +#ifdef _MSC_VER + com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); +#endif + if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); return; @@ -142,6 +151,12 @@ CubebSink::~CubebSink() { } cubeb_destroy(ctx); + +#ifdef _MSC_VER + if (SUCCEEDED(com_init_result)) { + CoUninitialize(); + } +#endif } SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h index efb9d1634..511df7bb1 100644 --- a/src/audio_core/cubeb_sink.h +++ b/src/audio_core/cubeb_sink.h @@ -25,6 +25,10 @@ private: cubeb* ctx{}; cubeb_devid output_device{}; std::vector<SinkStreamPtr> sink_streams; + +#ifdef _MSC_VER + u32 com_init_result = 0; +#endif }; std::vector<std::string> ListCubebSinkDevices(); diff --git a/src/common/color.h b/src/common/color.h index 0379040be..3a2222077 100644 --- a/src/common/color.h +++ b/src/common/color.h @@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) { /** * Decode a color stored in RGBA8 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) { return {bytes[3], bytes[2], bytes[1], bytes[0]}; } /** * Decode a color stored in RGB8 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) { return {bytes[2], bytes[1], bytes[0], 255}; } /** * Decode a color stored in RG8 (aka HILO8) format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { +inline Common::Vec4<u8> DecodeRG8(const u8* bytes) { return {bytes[1], bytes[0], 0, 255}; } /** * Decode a color stored in RGB565 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) { u16_le pixel; std::memcpy(&pixel, bytes, sizeof(pixel)); return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), @@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { /** * Decode a color stored in RGB5A1 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) { u16_le pixel; std::memcpy(&pixel, bytes, sizeof(pixel)); return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), @@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { /** * Decode a color stored in RGBA4 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) { u16_le pixel; std::memcpy(&pixel, bytes, sizeof(pixel)); return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), @@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) { /** * Decode a depth value and a stencil value stored in D24S8 format * @param bytes Pointer to encoded source values - * @return Resulting values stored as a Math::Vec2 + * @return Resulting values stored as a Common::Vec2 */ -inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { +inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) { return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; } @@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) { bytes[3] = color.r(); bytes[2] = color.g(); bytes[1] = color.b(); @@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) { bytes[2] = color.r(); bytes[1] = color.g(); bytes[0] = color.b(); @@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) { bytes[1] = color.r(); bytes[0] = color.g(); } @@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) { const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); @@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) { const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); @@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) { const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index b369f199f..4462ff3fb 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -39,8 +39,10 @@ public: Impl(Impl const&) = delete; const Impl& operator=(Impl const&) = delete; - void PushEntry(Entry e) { - message_queue.Push(std::move(e)); + void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, + const char* function, std::string message) { + message_queue.Push( + CreateEntry(log_class, log_level, filename, line_num, function, std::move(message))); } void AddBackend(std::unique_ptr<Backend> backend) { @@ -108,11 +110,30 @@ private: backend_thread.join(); } + Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, + const char* function, std::string message) const { + using std::chrono::duration_cast; + using std::chrono::steady_clock; + + Entry entry; + entry.timestamp = + duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin); + entry.log_class = log_class; + entry.log_level = log_level; + entry.filename = Common::TrimSourcePath(filename); + entry.line_num = line_nr; + entry.function = function; + entry.message = std::move(message); + + return entry; + } + std::mutex writing_mutex; std::thread backend_thread; std::vector<std::unique_ptr<Backend>> backends; Common::MPSCQueue<Log::Entry> message_queue; Filter filter; + std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; }; void ConsoleBackend::Write(const Entry& entry) { @@ -271,25 +292,6 @@ const char* GetLevelName(Level log_level) { #undef LVL } -Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, - const char* function, std::string message) { - using std::chrono::duration_cast; - using std::chrono::steady_clock; - - static steady_clock::time_point time_origin = steady_clock::now(); - - Entry entry; - entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin); - entry.log_class = log_class; - entry.log_level = log_level; - entry.filename = Common::TrimSourcePath(filename); - entry.line_num = line_nr; - entry.function = function; - entry.message = std::move(message); - - return entry; -} - void SetGlobalFilter(const Filter& filter) { Impl::Instance().SetGlobalFilter(filter); } @@ -314,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, if (!filter.CheckMessage(log_class, log_level)) return; - Entry entry = - CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); - - instance.PushEntry(std::move(entry)); + instance.PushEntry(log_class, log_level, filename, line_num, function, + fmt::vformat(format, args)); } } // namespace Log diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index a31ee6968..fca0267a1 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -135,10 +135,6 @@ const char* GetLogClassName(Class log_class); */ const char* GetLevelName(Level log_level); -/// Creates a log entry by formatting the given source location, and message. -Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, - const char* function, std::string message); - /** * The global filter will prevent any messages from even being processed if they are filtered. Each * backend can have a filter, but if the level is lower than the global filter, the backend will diff --git a/src/common/math_util.h b/src/common/math_util.h index 94b4394c5..cff3d48c5 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -7,7 +7,7 @@ #include <cstdlib> #include <type_traits> -namespace MathUtil { +namespace Common { constexpr float PI = 3.14159265f; @@ -41,4 +41,4 @@ struct Rectangle { } }; -} // namespace MathUtil +} // namespace Common diff --git a/src/common/quaternion.h b/src/common/quaternion.h index c528c0b68..370198ae0 100644 --- a/src/common/quaternion.h +++ b/src/common/quaternion.h @@ -6,12 +6,12 @@ #include "common/vector_math.h" -namespace Math { +namespace Common { template <typename T> class Quaternion { public: - Math::Vec3<T> xyz; + Vec3<T> xyz; T w{}; Quaternion<decltype(-T{})> Inverse() const { @@ -38,12 +38,12 @@ public: }; template <typename T> -auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { +auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) { return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); } -inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { +inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) { return {axis * std::sin(angle / 2), std::cos(angle / 2)}; } -} // namespace Math +} // namespace Common diff --git a/src/common/vector_math.h b/src/common/vector_math.h index 8feb49941..429485329 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h @@ -33,7 +33,7 @@ #include <cmath> #include <type_traits> -namespace Math { +namespace Common { template <typename T> class Vec2; @@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) { return MakeVec(x, yzw[0], yzw[1], yzw[2]); } -} // namespace Math +} // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 988356c65..8ccb2d5f0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -217,6 +217,7 @@ add_library(core STATIC hle/service/audio/audren_u.h hle/service/audio/codecctl.cpp hle/service/audio/codecctl.h + hle/service/audio/errors.h hle/service/audio/hwopus.cpp hle/service/audio/hwopus.h hle/service/bcat/bcat.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index ab7181a05..d741ef90d 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -78,6 +78,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, return vfs->OpenFile(path, FileSys::Mode::Read); } struct System::Impl { + explicit Impl(System& system) : kernel{system} {} Cpu& CurrentCpuCore() { return cpu_core_manager.GetCurrentCore(); @@ -95,7 +96,7 @@ struct System::Impl { LOG_DEBUG(HW_Memory, "initialized OK"); core_timing.Initialize(); - kernel.Initialize(core_timing); + kernel.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch()); @@ -182,13 +183,13 @@ struct System::Impl { void Shutdown() { // Log last frame performance stats - auto perf_results = GetAndResetPerfStats(); - Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", - perf_results.emulation_speed * 100.0); - Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", - perf_results.game_fps); - Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", - perf_results.frametime * 1000.0); + const auto perf_results = GetAndResetPerfStats(); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", + perf_results.emulation_speed * 100.0); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", + perf_results.game_fps); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", + perf_results.frametime * 1000.0); is_powered_on = false; @@ -265,7 +266,7 @@ struct System::Impl { Core::FrameLimiter frame_limiter; }; -System::System() : impl{std::make_unique<Impl>()} {} +System::System() : impl{std::make_unique<Impl>(*this)} {} System::~System() = default; Cpu& System::CurrentCpuCore() { diff --git a/src/core/core.h b/src/core/core.h index d720013f7..ba76a41d8 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() { return System::GetInstance().CurrentArmInterface(); } -inline TelemetrySession& Telemetry() { - return System::GetInstance().TelemetrySession(); -} - inline Kernel::Process* CurrentProcess() { return System::GetInstance().CurrentProcess(); } diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index 9dd493efb..e29afd630 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); } -std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { +std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const { new_x = std::max(new_x, framebuffer_layout.screen.left); new_x = std::min(new_x, framebuffer_layout.screen.right - 1); diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 7006a37b3..d0bcb4660 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -166,7 +166,7 @@ private: /** * Clip the provided coordinates to be inside the touchscreen area. */ - std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); + std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const; }; } // namespace Core::Frontend diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index f8662d193..a1357179f 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -12,12 +12,12 @@ namespace Layout { // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio template <class T> -static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, - float screen_aspect_ratio) { +static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area, + float screen_aspect_ratio) { float scale = std::min(static_cast<float>(window_area.GetWidth()), window_area.GetHeight() / screen_aspect_ratio); - return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), - static_cast<T>(std::round(scale * screen_aspect_ratio))}; + return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), + static_cast<T>(std::round(scale * screen_aspect_ratio))}; } FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { @@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width}; - MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; - MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); + Common::Rectangle<unsigned> screen_window_area{0, 0, width, height}; + Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); float window_aspect_ratio = static_cast<float>(height) / width; diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index e06647794..c2c63d08c 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -16,7 +16,7 @@ struct FramebufferLayout { unsigned width{ScreenUndocked::Width}; unsigned height{ScreenUndocked::Height}; - MathUtil::Rectangle<unsigned> screen; + Common::Rectangle<unsigned> screen; /** * Returns the ration of pixel size of the screen, compared to the native size of the undocked diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h index 16fdcd376..7c11d7546 100644 --- a/src/core/frontend/input.h +++ b/src/core/frontend/input.h @@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>; * Orientation is determined by right-hand rule. * Units: deg/sec */ -using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; +using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>; /** * A touch device is an input device that returns a tuple of two floats and a bool. The floats are diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h index ed84197b3..455d1f346 100644 --- a/src/core/hle/ipc.h +++ b/src/core/hle/ipc.h @@ -4,10 +4,10 @@ #pragma once +#include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/swap.h" -#include "core/hle/kernel/errors.h" -#include "core/memory.h" namespace IPC { diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index a250d088d..9780a7849 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "core/core.h" #include "core/core_cpu.h" +#include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" @@ -17,58 +18,16 @@ #include "core/hle/result.h" #include "core/memory.h" -namespace Kernel::AddressArbiter { - -// Performs actual address waiting logic. -static ResultCode WaitForAddress(VAddr address, s64 timeout) { - SharedPtr<Thread> current_thread = GetCurrentThread(); - current_thread->SetArbiterWaitAddress(address); - current_thread->SetStatus(ThreadStatus::WaitArb); - current_thread->InvalidateWakeupCallback(); - - current_thread->WakeAfterDelay(timeout); - - Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); - return RESULT_TIMEOUT; -} - -// Gets the threads waiting on an address. -static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) { - const auto RetrieveWaitingThreads = [](std::size_t core_index, - std::vector<SharedPtr<Thread>>& waiting_threads, - VAddr arb_addr) { - const auto& scheduler = Core::System::GetInstance().Scheduler(core_index); - const auto& thread_list = scheduler.GetThreadList(); - - for (const auto& thread : thread_list) { - if (thread->GetArbiterWaitAddress() == arb_addr) - waiting_threads.push_back(thread); - } - }; - - // Retrieve all threads that are waiting for this address. - std::vector<SharedPtr<Thread>> threads; - RetrieveWaitingThreads(0, threads, address); - RetrieveWaitingThreads(1, threads, address); - RetrieveWaitingThreads(2, threads, address); - RetrieveWaitingThreads(3, threads, address); - - // Sort them by priority, such that the highest priority ones come first. - std::sort(threads.begin(), threads.end(), - [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) { - return lhs->GetPriority() < rhs->GetPriority(); - }); - - return threads; -} - +namespace Kernel { +namespace { // Wake up num_to_wake (or all) threads in a vector. -static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { +void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { // Only process up to 'target' threads, unless 'target' is <= 0, in which case process // them all. std::size_t last = waiting_threads.size(); - if (num_to_wake > 0) + if (num_to_wake > 0) { last = num_to_wake; + } // Signal the waiting threads. for (std::size_t i = 0; i < last; i++) { @@ -78,42 +37,41 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num waiting_threads[i]->ResumeFromWait(); } } +} // Anonymous namespace -// Signals an address being waited on. -ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { - std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); +AddressArbiter::AddressArbiter(Core::System& system) : system{system} {} +AddressArbiter::~AddressArbiter() = default; +ResultCode AddressArbiter::SignalToAddress(VAddr address, s32 num_to_wake) { + const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); WakeThreads(waiting_threads, num_to_wake); return RESULT_SUCCESS; } -// Signals an address being waited on and increments its value if equal to the value argument. -ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { +ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, + s32 num_to_wake) { // Ensure that we can write to the address. if (!Memory::IsValidVirtualAddress(address)) { return ERR_INVALID_ADDRESS_STATE; } - if (static_cast<s32>(Memory::Read32(address)) == value) { - Memory::Write32(address, static_cast<u32>(value + 1)); - } else { + if (static_cast<s32>(Memory::Read32(address)) != value) { return ERR_INVALID_STATE; } + Memory::Write32(address, static_cast<u32>(value + 1)); return SignalToAddress(address, num_to_wake); } -// Signals an address being waited on and modifies its value based on waiting thread count if equal -// to the value argument. -ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, - s32 num_to_wake) { +ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, + s32 num_to_wake) { // Ensure that we can write to the address. if (!Memory::IsValidVirtualAddress(address)) { return ERR_INVALID_ADDRESS_STATE; } // Get threads waiting on the address. - std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); + const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); // Determine the modified value depending on the waiting count. s32 updated_value; @@ -125,31 +83,31 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu updated_value = value; } - if (static_cast<s32>(Memory::Read32(address)) == value) { - Memory::Write32(address, static_cast<u32>(updated_value)); - } else { + if (static_cast<s32>(Memory::Read32(address)) != value) { return ERR_INVALID_STATE; } + Memory::Write32(address, static_cast<u32>(updated_value)); WakeThreads(waiting_threads, num_to_wake); return RESULT_SUCCESS; } -// Waits on an address if the value passed is less than the argument value, optionally decrementing. -ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { +ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, + bool should_decrement) { // Ensure that we can read the address. if (!Memory::IsValidVirtualAddress(address)) { return ERR_INVALID_ADDRESS_STATE; } - s32 cur_value = static_cast<s32>(Memory::Read32(address)); - if (cur_value < value) { - if (should_decrement) { - Memory::Write32(address, static_cast<u32>(cur_value - 1)); - } - } else { + const s32 cur_value = static_cast<s32>(Memory::Read32(address)); + if (cur_value >= value) { return ERR_INVALID_STATE; } + + if (should_decrement) { + Memory::Write32(address, static_cast<u32>(cur_value - 1)); + } + // Short-circuit without rescheduling, if timeout is zero. if (timeout == 0) { return RESULT_TIMEOUT; @@ -158,8 +116,7 @@ ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool return WaitForAddress(address, timeout); } -// Waits on an address if the value passed is equal to the argument value. -ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { +ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { // Ensure that we can read the address. if (!Memory::IsValidVirtualAddress(address)) { return ERR_INVALID_ADDRESS_STATE; @@ -175,4 +132,46 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { return WaitForAddress(address, timeout); } -} // namespace Kernel::AddressArbiter + +ResultCode AddressArbiter::WaitForAddress(VAddr address, s64 timeout) { + SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread(); + current_thread->SetArbiterWaitAddress(address); + current_thread->SetStatus(ThreadStatus::WaitArb); + current_thread->InvalidateWakeupCallback(); + + current_thread->WakeAfterDelay(timeout); + + system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); + return RESULT_TIMEOUT; +} + +std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const { + const auto RetrieveWaitingThreads = [this](std::size_t core_index, + std::vector<SharedPtr<Thread>>& waiting_threads, + VAddr arb_addr) { + const auto& scheduler = system.Scheduler(core_index); + const auto& thread_list = scheduler.GetThreadList(); + + for (const auto& thread : thread_list) { + if (thread->GetArbiterWaitAddress() == arb_addr) { + waiting_threads.push_back(thread); + } + } + }; + + // Retrieve all threads that are waiting for this address. + std::vector<SharedPtr<Thread>> threads; + RetrieveWaitingThreads(0, threads, address); + RetrieveWaitingThreads(1, threads, address); + RetrieveWaitingThreads(2, threads, address); + RetrieveWaitingThreads(3, threads, address); + + // Sort them by priority, such that the highest priority ones come first. + std::sort(threads.begin(), threads.end(), + [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) { + return lhs->GetPriority() < rhs->GetPriority(); + }); + + return threads; +} +} // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index b58f21bec..e0c36f2e3 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -5,28 +5,68 @@ #pragma once #include "common/common_types.h" +#include "core/hle/kernel/address_arbiter.h" union ResultCode; -namespace Kernel::AddressArbiter { +namespace Core { +class System; +} -enum class ArbitrationType { - WaitIfLessThan = 0, - DecrementAndWaitIfLessThan = 1, - WaitIfEqual = 2, -}; +namespace Kernel { -enum class SignalType { - Signal = 0, - IncrementAndSignalIfEqual = 1, - ModifyByWaitingCountAndSignalIfEqual = 2, -}; +class Thread; + +class AddressArbiter { +public: + enum class ArbitrationType { + WaitIfLessThan = 0, + DecrementAndWaitIfLessThan = 1, + WaitIfEqual = 2, + }; + + enum class SignalType { + Signal = 0, + IncrementAndSignalIfEqual = 1, + ModifyByWaitingCountAndSignalIfEqual = 2, + }; + + explicit AddressArbiter(Core::System& system); + ~AddressArbiter(); + + AddressArbiter(const AddressArbiter&) = delete; + AddressArbiter& operator=(const AddressArbiter&) = delete; + + AddressArbiter(AddressArbiter&&) = default; + AddressArbiter& operator=(AddressArbiter&&) = delete; -ResultCode SignalToAddress(VAddr address, s32 num_to_wake); -ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); -ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); + /// Signals an address being waited on. + ResultCode SignalToAddress(VAddr address, s32 num_to_wake); -ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); -ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); + /// Signals an address being waited on and increments its value if equal to the value argument. + ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); + + /// Signals an address being waited on and modifies its value based on waiting thread count if + /// equal to the value argument. + ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, + s32 num_to_wake); + + /// Waits on an address if the value passed is less than the argument value, + /// optionally decrementing. + ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, + bool should_decrement); + + /// Waits on an address if the value passed is equal to the argument value. + ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); + +private: + // Waits on the given address with a timeout in nanoseconds + ResultCode WaitForAddress(VAddr address, s64 timeout); + + // Gets the threads waiting on an address. + std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; + + Core::System& system; +}; -} // namespace Kernel::AddressArbiter +} // namespace Kernel diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index d17eb0cb6..8097b3863 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; +constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104}; constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp index c8acde5b1..bdfaa977f 100644 --- a/src/core/hle/kernel/handle_table.cpp +++ b/src/core/hle/kernel/handle_table.cpp @@ -14,32 +14,47 @@ namespace Kernel { namespace { constexpr u16 GetSlot(Handle handle) { - return handle >> 15; + return static_cast<u16>(handle >> 15); } constexpr u16 GetGeneration(Handle handle) { - return handle & 0x7FFF; + return static_cast<u16>(handle & 0x7FFF); } } // Anonymous namespace HandleTable::HandleTable() { - next_generation = 1; Clear(); } HandleTable::~HandleTable() = default; +ResultCode HandleTable::SetSize(s32 handle_table_size) { + if (static_cast<u32>(handle_table_size) > MAX_COUNT) { + return ERR_OUT_OF_MEMORY; + } + + // Values less than or equal to zero indicate to use the maximum allowable + // size for the handle table in the actual kernel, so we ignore the given + // value in that case, since we assume this by default unless this function + // is called. + if (handle_table_size > 0) { + table_size = static_cast<u16>(handle_table_size); + } + + return RESULT_SUCCESS; +} + ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { DEBUG_ASSERT(obj != nullptr); - u16 slot = next_free_slot; - if (slot >= generations.size()) { + const u16 slot = next_free_slot; + if (slot >= table_size) { LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); return ERR_HANDLE_TABLE_FULL; } next_free_slot = generations[slot]; - u16 generation = next_generation++; + const u16 generation = next_generation++; // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. // Horizon OS uses zero to represent an invalid handle, so skip to 1. @@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) { } ResultCode HandleTable::Close(Handle handle) { - if (!IsValid(handle)) + if (!IsValid(handle)) { return ERR_INVALID_HANDLE; + } - u16 slot = GetSlot(handle); + const u16 slot = GetSlot(handle); objects[slot] = nullptr; @@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) { } bool HandleTable::IsValid(Handle handle) const { - std::size_t slot = GetSlot(handle); - u16 generation = GetGeneration(handle); + const std::size_t slot = GetSlot(handle); + const u16 generation = GetGeneration(handle); - return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; + return slot < table_size && objects[slot] != nullptr && generations[slot] == generation; } SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { @@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { } void HandleTable::Clear() { - for (u16 i = 0; i < MAX_COUNT; ++i) { + for (u16 i = 0; i < table_size; ++i) { generations[i] = i + 1; objects[i] = nullptr; } diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h index 89a3bc740..44901391b 100644 --- a/src/core/hle/kernel/handle_table.h +++ b/src/core/hle/kernel/handle_table.h @@ -50,6 +50,20 @@ public: ~HandleTable(); /** + * Sets the number of handles that may be in use at one time + * for this handle table. + * + * @param handle_table_size The desired size to limit the handle table to. + * + * @returns an error code indicating if initialization was successful. + * If initialization was not successful, then ERR_OUT_OF_MEMORY + * will be returned. + * + * @pre handle_table_size must be within the range [0, 1024] + */ + ResultCode SetSize(s32 handle_table_size); + + /** * Allocates a handle for the given object. * @return The created Handle or one of the following errors: * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. @@ -104,13 +118,20 @@ private: std::array<u16, MAX_COUNT> generations; /** + * The limited size of the handle table. This can be specified by process + * capabilities in order to restrict the overall number of handles that + * can be created in a process instance + */ + u16 table_size = static_cast<u16>(MAX_COUNT); + + /** * Global counter of the number of created handles. Stored in `generations` when a handle is * created, and wraps around to 1 when it hits 0x8000. */ - u16 next_generation; + u16 next_generation = 1; /// Head of the free slots linked list. - u16 next_free_slot; + u16 next_free_slot = 0; }; } // namespace Kernel diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index cb1c5aff3..0107acea4 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h @@ -15,6 +15,8 @@ #include "core/hle/ipc.h" #include "core/hle/kernel/object.h" +union ResultCode; + namespace Service { class ServiceFrameworkBase; } @@ -208,14 +210,12 @@ public: template <typename T> SharedPtr<T> GetCopyObject(std::size_t index) { - ASSERT(index < copy_objects.size()); - return DynamicObjectCast<T>(copy_objects[index]); + return DynamicObjectCast<T>(copy_objects.at(index)); } template <typename T> SharedPtr<T> GetMoveObject(std::size_t index) { - ASSERT(index < move_objects.size()); - return DynamicObjectCast<T>(move_objects[index]); + return DynamicObjectCast<T>(move_objects.at(index)); } void AddMoveObject(SharedPtr<Object> object) { @@ -232,7 +232,7 @@ public: template <typename T> std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { - return std::static_pointer_cast<T>(domain_request_handlers[index]); + return std::static_pointer_cast<T>(domain_request_handlers.at(index)); } void SetDomainRequestHandlers( diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index dd749eed4..04ea9349e 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/core_timing.h" +#include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" @@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_ } struct KernelCore::Impl { - void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) { + explicit Impl(Core::System& system) : address_arbiter{system}, system{system} {} + + void Initialize(KernelCore& kernel) { Shutdown(); InitializeSystemResourceLimit(kernel); - InitializeThreads(core_timing); + InitializeThreads(); } void Shutdown() { @@ -122,9 +125,9 @@ struct KernelCore::Impl { ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); } - void InitializeThreads(Core::Timing::CoreTiming& core_timing) { + void InitializeThreads() { thread_wakeup_event_type = - core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); + system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); } std::atomic<u32> next_object_id{0}; @@ -135,6 +138,8 @@ struct KernelCore::Impl { std::vector<SharedPtr<Process>> process_list; Process* current_process = nullptr; + Kernel::AddressArbiter address_arbiter; + SharedPtr<ResourceLimit> system_resource_limit; Core::Timing::EventType* thread_wakeup_event_type = nullptr; @@ -145,15 +150,18 @@ struct KernelCore::Impl { /// Map of named ports managed by the kernel, which can be retrieved using /// the ConnectToPort SVC. NamedPortTable named_ports; + + // System context + Core::System& system; }; -KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} +KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {} KernelCore::~KernelCore() { Shutdown(); } -void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) { - impl->Initialize(*this, core_timing); +void KernelCore::Initialize() { + impl->Initialize(*this); } void KernelCore::Shutdown() { @@ -184,6 +192,14 @@ const Process* KernelCore::CurrentProcess() const { return impl->current_process; } +AddressArbiter& KernelCore::AddressArbiter() { + return impl->address_arbiter; +} + +const AddressArbiter& KernelCore::AddressArbiter() const { + return impl->address_arbiter; +} + void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 154bced42..4d292aca9 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,6 +11,10 @@ template <typename T> class ResultVal; +namespace Core { +class System; +} + namespace Core::Timing { class CoreTiming; struct EventType; @@ -18,6 +22,7 @@ struct EventType; namespace Kernel { +class AddressArbiter; class ClientPort; class HandleTable; class Process; @@ -30,7 +35,14 @@ private: using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; public: - KernelCore(); + /// Constructs an instance of the kernel using the given System + /// instance as a context for any necessary system-related state, + /// such as threads, CPU core state, etc. + /// + /// @post After execution of the constructor, the provided System + /// object *must* outlive the kernel instance itself. + /// + explicit KernelCore(Core::System& system); ~KernelCore(); KernelCore(const KernelCore&) = delete; @@ -40,11 +52,7 @@ public: KernelCore& operator=(KernelCore&&) = delete; /// Resets the kernel to a clean slate for use. - /// - /// @param core_timing CoreTiming instance used to create any necessary - /// kernel-specific callback events. - /// - void Initialize(Core::Timing::CoreTiming& core_timing); + void Initialize(); /// Clears all resources in use by the kernel instance. void Shutdown(); @@ -67,6 +75,12 @@ public: /// Retrieves a const pointer to the current process. const Process* CurrentProcess() const; + /// Provides a reference to the kernel's address arbiter. + Kernel::AddressArbiter& AddressArbiter(); + + /// Provides a const reference to the kernel's address arbiter. + const Kernel::AddressArbiter& AddressArbiter() const; + /// Adds a port to the named port table void AddNamedPort(std::string name, SharedPtr<ClientPort> port); diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index c5aa19afa..8009150e0 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { vm_manager.Reset(metadata.GetAddressSpaceType()); const auto& caps = metadata.GetKernelCapabilities(); - return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); + const auto capability_init_result = + capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); + if (capability_init_result.IsError()) { + return capability_init_result; + } + + return handle_table.SetSize(capabilities.GetHandleTableSize()); } void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 3a2164b25..583e35b79 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp @@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() { interrupt_capabilities.set(); // Allow using the maximum possible amount of handles - handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); + handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT); // Allow all debugging capabilities. is_debuggable = true; @@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) { return ERR_RESERVED_VALUE; } - handle_table_size = (flags >> 16) & 0x3FF; + handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h index fbc8812a3..5cdd80747 100644 --- a/src/core/hle/kernel/process_capability.h +++ b/src/core/hle/kernel/process_capability.h @@ -156,7 +156,7 @@ public: } /// Gets the number of total allowable handles for the process' handle table. - u32 GetHandleTableSize() const { + s32 GetHandleTableSize() const { return handle_table_size; } @@ -252,7 +252,7 @@ private: u64 core_mask = 0; u64 priority_mask = 0; - u32 handle_table_size = 0; + s32 handle_table_size = 0; u32 kernel_version = 0; ProgramType program_type = ProgramType::SysModule; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index c5d399bab..7f5c0cc86 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -20,6 +20,7 @@ #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" +#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/mutex.h" @@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) { return address + size > address; } -// Checks if a given address range lies within a larger address range. -constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin, - VAddr address_range_end) { - const VAddr end_address = address + size - 1; - return address_range_begin <= address && end_address <= address_range_end - 1; -} - -bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) { - return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(), - vm.GetAddressSpaceEndAddress()); -} - -bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) { - return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(), - vm.GetNewMapRegionEndAddress()); -} - // 8 GiB constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; @@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add return ERR_INVALID_ADDRESS_STATE; } - if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { + if (!vm_manager.IsWithinAddressSpace(src_addr, size)) { LOG_ERROR(Kernel_SVC, "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", src_addr, size); return ERR_INVALID_ADDRESS_STATE; } - if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { + if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) { LOG_ERROR(Kernel_SVC, "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", dst_addr, size); @@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) { auto* const current_process = Core::CurrentProcess(); auto& vm_manager = current_process->VMManager(); - if (!IsInsideAddressSpace(vm_manager, addr, size)) { + if (!vm_manager.IsWithinAddressSpace(addr, size)) { LOG_ERROR(Kernel_SVC, "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, size); @@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr } auto& vm_manager = Core::CurrentProcess()->VMManager(); - if (!IsInsideAddressSpace(vm_manager, address, size)) { + if (!vm_manager.IsWithinAddressSpace(address, size)) { LOG_ERROR(Kernel_SVC, "Given address (0x{:016X}) is outside the bounds of the address space.", address); return ERR_INVALID_ADDRESS_STATE; @@ -1495,13 +1479,14 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout return ERR_INVALID_ADDRESS; } + auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter(); switch (static_cast<AddressArbiter::ArbitrationType>(type)) { case AddressArbiter::ArbitrationType::WaitIfLessThan: - return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); + return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, false); case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: - return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true); + return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, true); case AddressArbiter::ArbitrationType::WaitIfEqual: - return AddressArbiter::WaitForAddressIfEqual(address, value, timeout); + return address_arbiter.WaitForAddressIfEqual(address, value, timeout); default: LOG_ERROR(Kernel_SVC, "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan " @@ -1526,13 +1511,14 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to return ERR_INVALID_ADDRESS; } + auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter(); switch (static_cast<AddressArbiter::SignalType>(type)) { case AddressArbiter::SignalType::Signal: - return AddressArbiter::SignalToAddress(address, num_to_wake); + return address_arbiter.SignalToAddress(address, num_to_wake); case AddressArbiter::SignalType::IncrementAndSignalIfEqual: - return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake); + return address_arbiter.IncrementAndSignalToAddressIfEqual(address, value, num_to_wake); case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual: - return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, + return address_arbiter.ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake); default: LOG_ERROR(Kernel_SVC, diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 6661e2130..eb54d6651 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name return ERR_INVALID_PROCESSOR_ID; } - // TODO(yuriks): Other checks, returning 0xD9001BEA - if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); // TODO (bunnei): Find the correct error code to use here diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 10ad94aa6..05c59af34 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -17,8 +17,8 @@ #include "core/memory_setup.h" namespace Kernel { - -static const char* GetMemoryStateName(MemoryState state) { +namespace { +const char* GetMemoryStateName(MemoryState state) { static constexpr const char* names[] = { "Unmapped", "Io", "Normal", "CodeStatic", @@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) { return names[ToSvcMemoryState(state)]; } +// Checks if a given address range lies within a larger address range. +constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin, + VAddr address_range_end) { + const VAddr end_address = address + size - 1; + return address_range_begin <= address && end_address <= address_range_end - 1; +} +} // Anonymous namespace + bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { ASSERT(base + size == next.base); if (permissions != next.permissions || state != next.state || attribute != next.attribute || @@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p } ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { - if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || - target + size < target) { + if (!IsWithinHeapRegion(target, size)) { return ERR_INVALID_ADDRESS; } @@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p } ResultCode VMManager::HeapFree(VAddr target, u64 size) { - if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || - target + size < target) { + if (!IsWithinHeapRegion(target, size)) { return ERR_INVALID_ADDRESS; } @@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const { return address_space_width; } +bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const { + return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(), + GetAddressSpaceEndAddress()); +} + VAddr VMManager::GetASLRRegionBaseAddress() const { return aslr_region_base; } @@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const { return code_region_end - code_region_base; } +bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const { + return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(), + GetCodeRegionEndAddress()); +} + VAddr VMManager::GetHeapRegionBaseAddress() const { return heap_region_base; } @@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const { return heap_region_end - heap_region_base; } +bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const { + return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(), + GetHeapRegionEndAddress()); +} + VAddr VMManager::GetMapRegionBaseAddress() const { return map_region_base; } @@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const { return map_region_end - map_region_base; } +bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const { + return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); +} + VAddr VMManager::GetNewMapRegionBaseAddress() const { return new_map_region_base; } @@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const { return new_map_region_end - new_map_region_base; } +bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const { + return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(), + GetNewMapRegionEndAddress()); +} + VAddr VMManager::GetTLSIORegionBaseAddress() const { return tls_io_region_base; } @@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const { return tls_io_region_end - tls_io_region_base; } +bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const { + return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(), + GetTLSIORegionEndAddress()); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 6091533bc..88e0b3c02 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -432,18 +432,21 @@ public: /// Gets the address space width in bits. u64 GetAddressSpaceWidth() const; + /// Determines whether or not the given address range lies within the address space. + bool IsWithinAddressSpace(VAddr address, u64 size) const; + /// Gets the base address of the ASLR region. VAddr GetASLRRegionBaseAddress() const; /// Gets the end address of the ASLR region. VAddr GetASLRRegionEndAddress() const; - /// Determines whether or not the specified address range is within the ASLR region. - bool IsWithinASLRRegion(VAddr address, u64 size) const; - /// Gets the size of the ASLR region u64 GetASLRRegionSize() const; + /// Determines whether or not the specified address range is within the ASLR region. + bool IsWithinASLRRegion(VAddr address, u64 size) const; + /// Gets the base address of the code region. VAddr GetCodeRegionBaseAddress() const; @@ -453,6 +456,9 @@ public: /// Gets the total size of the code region in bytes. u64 GetCodeRegionSize() const; + /// Determines whether or not the specified range is within the code region. + bool IsWithinCodeRegion(VAddr address, u64 size) const; + /// Gets the base address of the heap region. VAddr GetHeapRegionBaseAddress() const; @@ -462,6 +468,9 @@ public: /// Gets the total size of the heap region in bytes. u64 GetHeapRegionSize() const; + /// Determines whether or not the specified range is within the heap region. + bool IsWithinHeapRegion(VAddr address, u64 size) const; + /// Gets the base address of the map region. VAddr GetMapRegionBaseAddress() const; @@ -471,6 +480,9 @@ public: /// Gets the total size of the map region in bytes. u64 GetMapRegionSize() const; + /// Determines whether or not the specified range is within the map region. + bool IsWithinMapRegion(VAddr address, u64 size) const; + /// Gets the base address of the new map region. VAddr GetNewMapRegionBaseAddress() const; @@ -480,6 +492,9 @@ public: /// Gets the total size of the new map region in bytes. u64 GetNewMapRegionSize() const; + /// Determines whether or not the given address range is within the new map region + bool IsWithinNewMapRegion(VAddr address, u64 size) const; + /// Gets the base address of the TLS IO region. VAddr GetTLSIORegionBaseAddress() const; @@ -489,6 +504,9 @@ public: /// Gets the total size of the TLS IO region in bytes. u64 GetTLSIORegionSize() const; + /// Determines if the given address range is within the TLS IO region. + bool IsWithinTLSIORegion(VAddr address, u64 size) const; + /// Each VMManager has its own page table, which is set as the main one when the owning process /// is scheduled. Memory::PageTable page_table; diff --git a/src/core/hle/result.h b/src/core/hle/result.h index bfb77cc31..1ed144481 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h @@ -8,7 +8,6 @@ #include <utility> #include "common/assert.h" #include "common/bit_field.h" -#include "common/common_funcs.h" #include "common/common_types.h" // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index f255f74b5..8c5bd6059 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -7,6 +7,7 @@ #include "common/string_util.h" #include "core/core.h" #include "core/frontend/applets/software_keyboard.h" +#include "core/hle/result.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/software_keyboard.h" diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h index efd5753a1..b93a30d28 100644 --- a/src/core/hle/service/am/applets/software_keyboard.h +++ b/src/core/hle/service/am/applets/software_keyboard.h @@ -9,10 +9,13 @@ #include <vector> #include "common/common_funcs.h" +#include "common/common_types.h" #include "common/swap.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/applets.h" +union ResultCode; + namespace Service::AM::Applets { enum class KeysetDisable : u32 { diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 6831c0735..bbe813490 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -18,17 +18,11 @@ #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/service/audio/audout_u.h" +#include "core/hle/service/audio/errors.h" #include "core/memory.h" namespace Service::Audio { -namespace ErrCodes { -enum { - ErrorUnknown = 2, - BufferCountExceeded = 8, -}; -} - constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; constexpr int DefaultSampleRate{48000}; @@ -100,7 +94,7 @@ private: if (stream->IsPlaying()) { IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); + rb.Push(ERR_OPERATION_FAILED); return; } @@ -143,7 +137,8 @@ private: if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); + rb.Push(ERR_BUFFER_COUNT_EXCEEDED); + return; } IPC::ResponseBuilder rb{ctx, 2}; diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 7e0cc64a8..c9de10a24 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -17,6 +17,7 @@ #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/writable_event.h" #include "core/hle/service/audio/audren_u.h" +#include "core/hle/service/audio/errors.h" namespace Service::Audio { @@ -37,7 +38,7 @@ public: {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, - {11, nullptr, "ExecuteAudioRendererRendering"}, + {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"}, }; // clang-format on RegisterHandlers(functions); @@ -138,6 +139,17 @@ private: rb.Push(rendering_time_limit_percent); } + void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_Audio, "called"); + + // This service command currently only reports an unsupported operation + // error code, or aborts. Given that, we just always return an error + // code in this case. + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_NOT_SUPPORTED); + } + Kernel::EventPair system_event; std::unique_ptr<AudioCore::AudioRenderer> renderer; u32 rendering_time_limit_percent = 100; @@ -235,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") { {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, - {3, nullptr, "OpenAudioRendererAuto"}, + {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"}, {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, }; // clang-format on @@ -248,12 +260,7 @@ AudRenU::~AudRenU() = default; void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "called"); - IPC::RequestParser rp{ctx}; - auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); - IPC::ResponseBuilder rb{ctx, 2, 0, 1}; - - rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params)); + OpenAudioRendererImpl(ctx); } void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { @@ -262,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "called"); u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); - buffer_sz += params.unknown_c * 1024; - buffer_sz += 0x940 * (params.unknown_c + 1); + buffer_sz += params.submix_count * 1024; + buffer_sz += 0x940 * (params.submix_count + 1); buffer_sz += 0x3F0 * params.voice_count; - buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); + buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10); buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); - buffer_sz += - Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * - (params.mix_buffer_count + 6), - 0x40); + buffer_sz += Common::AlignUp( + (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) * + (params.mix_buffer_count + 6), + 0x40); if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - u32 count = params.unknown_c + 1; + const u32 count = params.submix_count + 1; u64 node_count = Common::AlignUp(count, 0x40); - u64 node_state_buffer_sz = + const u64 node_state_buffer_sz = 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); u64 edge_matrix_buffer_sz = 0; node_count = Common::AlignUp(count * count, 0x40); @@ -289,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - buffer_sz += 0xE0 * params.unknown_2c; + buffer_sz += 0xE0 * params.num_splitter_send_channels; buffer_sz += 0x20 * params.splitter_count; - buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); + buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10); } buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + ((params.voice_count * 256) | 0x40); - if (params.unknown_1c >= 1) { + if (params.performance_frame_count >= 1) { output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 16 * params.voice_count + 16) + 0x658) * - (params.unknown_1c + 1) + + (params.performance_frame_count + 1) + 0xc0, 0x40) + output_sz; @@ -325,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { rb.PushIpcInterface<Audio::IAudioDevice>(); } +void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_Audio, "called"); + + OpenAudioRendererImpl(ctx); +} + void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); @@ -335,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c // based on the current revision } +void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IAudioRenderer>(params); +} + bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap switch (feature) { diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 3d63388fb..e55d25973 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -21,8 +21,11 @@ private: void OpenAudioRenderer(Kernel::HLERequestContext& ctx); void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); void GetAudioDeviceService(Kernel::HLERequestContext& ctx); + void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx); void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); + void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); + enum class AudioFeatures : u32 { Splitter, }; diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h new file mode 100644 index 000000000..6f8c09bcf --- /dev/null +++ b/src/core/hle/service/audio/errors.h @@ -0,0 +1,15 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/result.h" + +namespace Service::Audio { + +constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2}; +constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8}; +constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513}; + +} // namespace Service::Audio diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 6d897c842..7cc58db4c 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -15,7 +15,7 @@ namespace Kernel { class SharedMemory; } -namespace SM { +namespace Service::SM { class ServiceManager; } diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 21ccfe1f8..dbe7ee6e8 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect) { + const Common::Rectangle<int>& crop_rect) { VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index a45086e45..ace71169f 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -25,7 +25,7 @@ public: /// Performs a screen flip, drawing the buffer pointed to by the handle. void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect); + const Common::Rectangle<int>& crop_rect); private: std::shared_ptr<nvmap> nvmap_dev; diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index fc07d9bb8..4d150fc71 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { } void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect) { + const Common::Rectangle<int>& crop_rect) { auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { return buffer.slot == slot; }); ASSERT(itr != queue.end()); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index ab90d591e..e1ccb6171 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -67,14 +67,14 @@ public: Status status = Status::Free; IGBPBuffer igbp_buffer; BufferTransformFlags transform; - MathUtil::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect; }; void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); std::optional<u32> DequeueBuffer(u32 width, u32 height); const IGBPBuffer& RequestBuffer(u32 slot) const; void QueueBuffer(u32 slot, BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect); + const Common::Rectangle<int>& crop_rect); std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); void ReleaseBuffer(u32 slot); u32 Query(QueryType type); diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 74384a24d..a975767bb 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -420,7 +420,7 @@ public: u32_le fence_is_valid; std::array<Fence, 2> fences; - MathUtil::Rectangle<int> GetCropRect() const { + Common::Rectangle<int> GetCropRect() const { return {crop_left, crop_top, crop_right, crop_bottom}; } }; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index e9166dbd9..ec279cef8 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa FlushMode::FlushAndInvalidate); VAddr end = base + size; - while (base != end) { - ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); + ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", + base + page_table.pointers.size()); - page_table.attributes[base] = type; - page_table.pointers[base] = memory; + std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); - base += 1; - if (memory != nullptr) + if (memory == nullptr) { + std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); + } else { + while (base != end) { + page_table.pointers[base] = memory; + + base += 1; memory += PAGE_SIZE; + } } } @@ -166,9 +171,6 @@ T Read(const VAddr vaddr) { return value; } - // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state - std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); - PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (type) { case PageType::Unmapped: @@ -199,9 +201,6 @@ void Write(const VAddr vaddr, const T data) { return; } - // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state - std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); - PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (type) { case PageType::Unmapped: diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp index 9570c060e..6d96d4019 100644 --- a/src/input_common/motion_emu.cpp +++ b/src/input_common/motion_emu.cpp @@ -32,12 +32,12 @@ public: } void BeginTilt(int x, int y) { - mouse_origin = Math::MakeVec(x, y); + mouse_origin = Common::MakeVec(x, y); is_tilting = true; } void Tilt(int x, int y) { - auto mouse_move = Math::MakeVec(x, y) - mouse_origin; + auto mouse_move = Common::MakeVec(x, y) - mouse_origin; if (is_tilting) { std::lock_guard<std::mutex> guard(tilt_mutex); if (mouse_move.x == 0 && mouse_move.y == 0) { @@ -45,7 +45,7 @@ public: } else { tilt_direction = mouse_move.Cast<float>(); tilt_angle = - std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); + std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f); } } } @@ -56,7 +56,7 @@ public: is_tilting = false; } - std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() { std::lock_guard<std::mutex> guard(status_mutex); return status; } @@ -66,17 +66,17 @@ private: const std::chrono::steady_clock::duration update_duration; const float sensitivity; - Math::Vec2<int> mouse_origin; + Common::Vec2<int> mouse_origin; std::mutex tilt_mutex; - Math::Vec2<float> tilt_direction; + Common::Vec2<float> tilt_direction; float tilt_angle = 0; bool is_tilting = false; Common::Event shutdown_event; - std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; + std::tuple<Common::Vec3<float>, Common::Vec3<float>> status; std::mutex status_mutex; // Note: always keep the thread declaration at the end so that other objects are initialized @@ -85,8 +85,8 @@ private: void MotionEmuThread() { auto update_time = std::chrono::steady_clock::now(); - Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); - Math::Quaternion<float> old_q; + Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0); + Common::Quaternion<float> old_q; while (!shutdown_event.WaitUntil(update_time)) { update_time += update_duration; @@ -96,18 +96,18 @@ private: std::lock_guard<std::mutex> guard(tilt_mutex); // Find the quaternion describing current 3DS tilting - q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), - tilt_angle); + q = Common::MakeQuaternion( + Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle); } auto inv_q = q.Inverse(); // Set the gravity vector in world space - auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); + auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f); // Find the angular rate vector in world space auto angular_rate = ((q - old_q) * inv_q).xyz * 2; - angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; + angular_rate *= 1000 / update_millisecond / Common::PI * 180; // Transform the two vectors from world space to 3DS space gravity = QuaternionRotate(inv_q, gravity); @@ -131,7 +131,7 @@ public: device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); } - std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override { return device->GetStatus(); } diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 9b8a44fa1..ea27ef90d 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -13,11 +13,11 @@ namespace ArmTests { TestEnvironment::TestEnvironment(bool mutable_memory_) - : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { - + : mutable_memory(mutable_memory_), + test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { auto process = Kernel::Process::Create(kernel, ""); kernel.MakeCurrentProcess(process.get()); - page_table = &Core::CurrentProcess()->VMManager().page_table; + page_table = &process->VMManager().page_table; std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); page_table->special_regions.clear(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b5a327936..c1ae83f4d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -106,6 +106,8 @@ add_library(video_core STATIC if (ENABLE_VULKAN) target_sources(video_core PRIVATE renderer_vulkan/declarations.h + renderer_vulkan/vk_buffer_cache.cpp + renderer_vulkan/vk_buffer_cache.h renderer_vulkan/vk_device.cpp renderer_vulkan/vk_device.h renderer_vulkan/vk_memory_manager.cpp @@ -113,7 +115,9 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_resource_manager.cpp renderer_vulkan/vk_resource_manager.h renderer_vulkan/vk_scheduler.cpp - renderer_vulkan/vk_scheduler.h) + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h) target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ec1a57226..03b7ee5d8 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -2,12 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/core.h" -#include "core/memory.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/math_util.h" #include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" -#include "video_core/textures/decoders.h" namespace Tegra::Engines { @@ -44,10 +43,10 @@ void Fermi2D::HandleSurfaceCopy() { const u32 src_blit_y2{ static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; - const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, - regs.blit_dst_x + regs.blit_dst_width, - regs.blit_dst_y + regs.blit_dst_height}; + const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; + const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, + regs.blit_dst_x + regs.blit_dst_width, + regs.blit_dst_y + regs.blit_dst_height}; if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { UNIMPLEMENTED(); diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index c69f74cc5..80523e320 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -5,7 +5,7 @@ #pragma once #include <array> -#include "common/assert.h" +#include <cstddef> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 4ca856b6b..b1d950460 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -2,9 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "common/logging/log.h" -#include "core/core.h" -#include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/memory_manager.h" diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index df0a32e0f..6575afd0f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -5,8 +5,7 @@ #pragma once #include <array> -#include "common/assert.h" -#include "common/bit_field.h" +#include <cstddef> #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index f680c2ad9..9181e9d80 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <cstddef> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2d2136067..144e7fa82 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { auto debug_context = system.GetGPUDebugContext(); + const u32 method = method_call.method; + // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { - ASSERT(method_call.method == executing_macro + 1); + ASSERT(method == executing_macro + 1); } // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. - if (method_call.method >= MacroRegistersStart) { + if (method >= MacroRegistersStart) { // We're trying to execute a macro if (executing_macro == 0) { // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method_call.method % 2) == 0, + ASSERT_MSG((method % 2) == 0, "Can't start macro execution by writing to the ARGS register"); - executing_macro = method_call.method; + executing_macro = method; } macro_params.push_back(method_call.argument); @@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { return; } - ASSERT_MSG(method_call.method < Regs::NUM_REGS, + ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - if (regs.reg_array[method_call.method] != method_call.argument) { - regs.reg_array[method_call.method] = method_call.argument; + if (regs.reg_array[method] != method_call.argument) { + regs.reg_array[method] = method_call.argument; // Color buffers constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); - if (method_call.method >= first_rt_reg && - method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { - const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; - dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); + if (method >= first_rt_reg && + method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { + const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; + dirty_flags.color_buffer.set(rt_index); } // Zeta buffer constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); - if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || - method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || - method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || - (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && - method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { + if (method == MAXWELL3D_REG_INDEX(zeta_enable) || + method == MAXWELL3D_REG_INDEX(zeta_width) || + method == MAXWELL3D_REG_INDEX(zeta_height) || + (method >= MAXWELL3D_REG_INDEX(zeta) && + method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { dirty_flags.zeta_buffer = true; } // Shader constexpr u32 shader_registers_count = sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); - if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && - method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { + if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && + method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { dirty_flags.shaders = true; } // Vertex format - if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && - method_call.method < - MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { + if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && + method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { dirty_flags.vertex_attrib_format = true; } // Vertex buffer - if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && - method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { - dirty_flags.vertex_array |= - 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); - } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && - method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { - dirty_flags.vertex_array |= - 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); - } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && - method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { - dirty_flags.vertex_array |= - 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays)); + if (method >= MAXWELL3D_REG_INDEX(vertex_array) && + method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { + dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); + } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && + method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); + } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && + method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { + dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); } } - switch (method_call.method) { + switch (method) { case MAXWELL3D_REG_INDEX(macros.data): { ProcessMacroUpload(method_call.argument); break; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0e3873ffd..7fbf1026e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -5,8 +5,10 @@ #pragma once #include <array> +#include <bitset> #include <unordered_map> #include <vector> + #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" @@ -503,7 +505,7 @@ public: f32 translate_z; INSERT_PADDING_WORDS(2); - MathUtil::Rectangle<s32> GetRect() const { + Common::Rectangle<s32> GetRect() const { return { GetX(), // left GetY() + GetHeight(), // top @@ -1094,19 +1096,18 @@ public: MemoryManager& memory_manager; struct DirtyFlags { - u8 color_buffer = 0xFF; - bool zeta_buffer = true; - - bool shaders = true; + std::bitset<8> color_buffer{0xFF}; + std::bitset<32> vertex_array{0xFFFFFFFF}; bool vertex_attrib_format = true; - u32 vertex_array = 0xFFFFFFFF; + bool zeta_buffer = true; + bool shaders = true; void OnMemoryWrite() { - color_buffer = 0xFF; zeta_buffer = true; shaders = true; - vertex_array = 0xFFFFFFFF; + color_buffer.set(); + vertex_array.set(); } }; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 529a14ec7..0474c7ba3 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/logging/log.h" #include "core/core.h" #include "core/memory.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index cf75aeb12..34c369320 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <cstddef> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 252592edd..c7eb15b6a 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -6,7 +6,6 @@ #include <bitset> #include <optional> -#include <string> #include <tuple> #include <vector> diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 0f5bfdcbf..6313702f2 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -100,7 +100,7 @@ struct FramebufferConfig { using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; TransformFlags transform_flags; - MathUtil::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect; }; namespace Engines { diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index bcf0c15a4..a7bcf26fb 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -129,6 +129,15 @@ protected: return ++modified_ticks; } + /// Flushes the specified object, updating appropriate cache state as needed + void FlushObject(const T& object) { + if (!object->IsDirty()) { + return; + } + object->Flush(); + object->MarkAsModified(false, *this); + } + private: /// Returns a list of cached objects from the specified memory region, ordered by access time std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { @@ -154,15 +163,6 @@ private: return objects; } - /// Flushes the specified object, updating appropriate cache state as needed - void FlushObject(const T& object) { - if (!object->IsDirty()) { - return; - } - object->Flush(); - object->MarkAsModified(false, *this); - } - using ObjectSet = std::set<T>; using ObjectCache = std::unordered_map<VAddr, T>; using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index b2a223705..6a1dc9cf6 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,8 +47,8 @@ public: /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { return false; } diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 94223f45f..919d1f2d4 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/logging/log.h" #include "core/frontend/emu_window.h" #include "core/settings.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12d876120..321d9dd3d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -102,8 +102,8 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { + : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window}, + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { } // Rebinding the VAO invalidates the vertex buffer bindings. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); state.draw.vertex_array = vao_entry.handle; return vao_entry.handle; @@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; - if (!gpu.dirty_flags.vertex_array) + if (gpu.dirty_flags.vertex_array.none()) return; MICROPROFILE_SCOPE(OpenGL_VB); // Upload all guest vertex arrays sequentially to our buffer for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (~gpu.dirty_flags.vertex_array & (1u << index)) + if (!gpu.dirty_flags.vertex_array[index]) continue; const auto& vertex_array = regs.vertex_array[index]; @@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { } } - gpu.dirty_flags.vertex_array = 0; + gpu.dirty_flags.vertex_array.reset(); } DrawParameters RasterizerOpenGL::SetupDraw() { @@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && - !gpu.dirty_flags.zeta_buffer) { + if (fb_config_state == current_framebuffer_config_state && + gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the // host framebuffer may contain different attachments @@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() { // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); - bool invalidate = buffer_cache.Map(buffer_size); + const bool invalidate = buffer_cache.Map(buffer_size); if (invalidate) { // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); } const GLuint vao = SetupVertexFormat(); @@ -738,19 +738,11 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); - // Execute draw call + res_cache.SignalPreDrawCall(); params.DispatchDraw(); - - // Disable scissor test - state.viewports[0].scissor.enabled = false; + res_cache.SignalPostDrawCall(); accelerate_draw = AccelDraw::Disabled; - - // Unbind textures for potential future use as framebuffer attachments - for (auto& texture_unit : state.texture_units) { - texture_unit.Unbind(); - } - state.Apply(); } void RasterizerOpenGL::FlushAll() {} @@ -779,8 +771,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); return true; @@ -1034,7 +1026,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { for (std::size_t i = 0; i < viewport_count; i++) { auto& viewport = current_state.viewports[i]; const auto& src = regs.viewports[i]; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; + const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; viewport.x = viewport_rect.left; viewport.y = viewport_rect.bottom; viewport.width = viewport_rect.GetWidth(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 258d62259..2f0524f85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -62,8 +62,8 @@ public: void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) override; + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 642ccb269..876698b37 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <optional> #include <glad/glad.h> #include "common/alignment.h" @@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return format; } -MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { +Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; if (IsPixelFormatASTC(pixel_format)) { // ASTC formats must stop at the ATSC block size boundary @@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) // alternatives. This signals a bug on those functions. const auto width = static_cast<GLsizei>(params.MipWidth(0)); const auto height = static_cast<GLsizei>(params.MipHeight(0)); + memory_size = params.MemorySize(); + reinterpreted = false; const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); gl_internal_format = format_tuple.internal_format; @@ -873,30 +876,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; const auto& regs{gpu.regs}; - if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { - return last_color_buffers[index]; + if (!gpu.dirty_flags.color_buffer[index]) { + return current_color_buffers[index]; } - gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); + gpu.dirty_flags.color_buffer.reset(index); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); if (index >= regs.rt_control.count) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - return last_color_buffers[index] = GetSurface(color_params, preserve_contents); + return current_color_buffers[index] = GetSurface(color_params, preserve_contents); } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->LoadGLBuffer(); surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); + surface->MarkForReload(false); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { @@ -908,18 +912,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres Surface surface{TryGet(params.addr)}; if (surface) { if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is + // Use the cached surface as-is unless it's not synced with memory + if (surface->MustReload()) + LoadSurface(surface); return surface; } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); + UnregisterSurface(surface); Register(new_surface); + if (new_surface->IsUploaded()) { + RegisterReinterpretSurface(new_surface); + } return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); + UnregisterSurface(surface); } } @@ -973,8 +982,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, } static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -1104,7 +1113,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, void RasterizerCacheOpenGL::FermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); @@ -1201,4 +1210,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params return {}; } +static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, + u32 height) { + for (u32 i = 0; i < params.max_mip_level; i++) { + if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { + return {i}; + } + } + return {}; +} + +static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { + const std::size_t size = params.LayerMemorySize(); + VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); + for (u32 i = 0; i < params.depth; i++) { + if (start == addr) { + return {i}; + } + start += size; + } + return {}; +} + +static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + const std::size_t src_memory_size = src_params.size_in_bytes; + const std::optional<u32> level = + TryFindBestMipMap(src_memory_size, dst_params, src_params.height); + if (level.has_value()) { + if (src_params.width == dst_params.MipWidthGobAligned(*level) && + src_params.height == dst_params.MipHeight(*level) && + src_params.block_height >= dst_params.MipBlockHeight(*level)) { + const std::optional<u32> slot = + TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); + if (slot.has_value()) { + glCopyImageSubData(render_surface->Texture().handle, + SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, + blitted_surface->Texture().handle, + SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, + dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); + blitted_surface->MarkAsModified(true, cache); + return true; + } + } + } + return false; +} + +static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { + const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); + const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); + if (bound2 > bound1) + return true; + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.component_type != src_params.component_type); +} + +static bool IsReinterpretInvalidSecond(const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.height > src_params.height && dst_params.width > src_params.width); +} + +bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, + Surface intersect) { + if (IsReinterpretInvalid(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { + if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + FlushObject(intersect); + FlushObject(triggering_surface); + intersect->MarkForReload(true); + } + return true; +} + +void RasterizerCacheOpenGL::SignalPreDrawCall() { + if (texception && GLAD_GL_ARB_texture_barrier) { + glTextureBarrier(); + } + texception = false; +} + +void RasterizerCacheOpenGL::SignalPostDrawCall() { + for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { + if (current_color_buffers[i] != nullptr) { + Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); + if (intersect != nullptr) { + PartialReinterpretSurface(current_color_buffers[i], intersect); + texception = true; + } + } + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 89d733c50..797bbdc9c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -28,12 +28,13 @@ namespace OpenGL { class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; +using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; using SurfaceTarget = VideoCore::Surface::SurfaceTarget; using SurfaceType = VideoCore::Surface::SurfaceType; using PixelFormat = VideoCore::Surface::PixelFormat; using ComponentType = VideoCore::Surface::ComponentType; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct SurfaceParams { enum class SurfaceClass { @@ -71,7 +72,7 @@ struct SurfaceParams { } /// Returns the rectangle corresponding to this surface - MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; + Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; /// Returns the total size of this surface in bytes, adjusted for compression std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { @@ -140,10 +141,18 @@ struct SurfaceParams { return offset; } + std::size_t GetMipmapSingleSize(u32 mip_level) const { + return InnerMipmapMemorySize(mip_level, false, is_layered); + } + u32 MipWidth(u32 mip_level) const { return std::max(1U, width >> mip_level); } + u32 MipWidthGobAligned(u32 mip_level) const { + return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); + } + u32 MipHeight(u32 mip_level) const { return std::max(1U, height >> mip_level); } @@ -346,6 +355,10 @@ public: return cached_size_in_bytes; } + std::size_t GetMemorySize() const { + return memory_size; + } + void Flush() override { FlushGLBuffer(); } @@ -395,6 +408,26 @@ public: Tegra::Texture::SwizzleSource swizzle_z, Tegra::Texture::SwizzleSource swizzle_w); + void MarkReinterpreted() { + reinterpreted = true; + } + + bool IsReinterpreted() const { + return reinterpreted; + } + + void MarkForReload(bool reload) { + must_reload = reload; + } + + bool MustReload() const { + return must_reload; + } + + bool IsUploaded() const { + return params.identity == SurfaceParams::SurfaceClass::Uploaded; + } + private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); @@ -408,6 +441,9 @@ private: GLenum gl_internal_format{}; std::size_t cached_size_in_bytes{}; std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; + std::size_t memory_size; + bool reinterpreted = false; + bool must_reload = false; }; class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { @@ -430,8 +466,11 @@ public: /// Copies the contents of one surface to another void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect); + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect); + + void SignalPreDrawCall(); + void SignalPostDrawCall(); private: void LoadSurface(const Surface& surface); @@ -449,6 +488,10 @@ private: /// Tries to get a reserved surface for the specified parameters Surface TryGetReservedSurface(const SurfaceParams& params); + // Partialy reinterpret a surface based on a triggering_surface that collides with it. + // returns true if the reinterpret was successful, false in case it was not. + bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); + /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); @@ -465,12 +508,50 @@ private: OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; + bool texception = false; + /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one /// using the new format. OGLBuffer copy_pbo; - std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + + using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; + using SurfaceInterval = typename SurfaceIntervalCache::interval_type; + + static auto GetReinterpretInterval(const Surface& object) { + return SurfaceInterval::right_open(object->GetAddr() + 1, + object->GetAddr() + object->GetMemorySize() - 1); + } + + // Reinterpreted surfaces are very fragil as the game may keep rendering into them. + SurfaceIntervalCache reinterpreted_surfaces; + + void RegisterReinterpretSurface(Surface reinterpret_surface) { + auto interval = GetReinterpretInterval(reinterpret_surface); + reinterpreted_surfaces.insert({interval, reinterpret_surface}); + reinterpret_surface->MarkReinterpreted(); + } + + Surface CollideOnReinterpretedSurface(VAddr addr) const { + const SurfaceInterval interval{addr}; + for (auto& pair : + boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { + return pair.second; + } + return nullptr; + } + + /// Unregisters an object from the cache + void UnregisterSurface(const Surface& object) { + if (object->IsReinterpreted()) { + auto interval = GetReinterpretInterval(object); + reinterpreted_surfaces.erase(interval); + } + Unregister(object); + } }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 81882822b..82fc4d44b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <cstring> #include <fmt/format.h> #include <lz4.h> diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 219f08053..9419326a3 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const { if (has_delta) { glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - textures.data()); + textures.data() + first); } } @@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const { } if (has_delta) { glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - samplers.data()); + samplers.data() + first); } } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 272fc2e8e..8b510b6ae 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() { LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); } +void RendererOpenGL::AddTelemetryFields() { + const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; + const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; + const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; + + LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); + LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); + LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); + + auto& telemetry_session = system.TelemetrySession(); + telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); + telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); + telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); +} + void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; @@ -257,6 +272,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer) { texture.width = framebuffer.width; texture.height = framebuffer.height; + texture.pixel_format = framebuffer.pixel_format; GLint internal_format; switch (framebuffer.pixel_format) { @@ -465,17 +481,7 @@ bool RendererOpenGL::Init() { glDebugMessageCallback(DebugHandler, nullptr); } - const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; - const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; - const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; - - LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); - LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); - LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); - - Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); - Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); - Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); + AddTelemetryFields(); if (!GLAD_GL_VERSION_4_3) { return false; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 7e13e566b..6cbf9d2cb 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -39,7 +39,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; + const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; @@ -60,6 +60,7 @@ public: private: void InitOpenGLObjects(); + void AddTelemetryFields(); void CreateRasterizer(); void ConfigureFramebufferTexture(TextureInfo& texture, @@ -102,7 +103,7 @@ private: /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; - MathUtil::Rectangle<int> framebuffer_crop_rect; + Common::Rectangle<int> framebuffer_crop_rect; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp new file mode 100644 index 000000000..4a33a6c84 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -0,0 +1,116 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <memory> +#include <optional> +#include <tuple> + +#include "common/alignment.h" +#include "common/assert.h" +#include "core/memory.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, + VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) + : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} { + const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer; + const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | + vk::AccessFlagBits::eUniformRead; + stream_buffer = + std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access, + vk::PipelineStageFlagBits::eAllCommands); + buffer_handle = stream_buffer->GetBuffer(); +} + +VKBufferCache::~VKBufferCache() = default; + +u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, + bool cache) { + const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; + ASSERT(cpu_addr); + + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + cache &= size >= 2048; + + if (cache) { + if (auto entry = TryGet(*cpu_addr); entry) { + if (entry->size >= size && entry->alignment == alignment) { + return entry->offset; + } + Unregister(entry); + } + } + + AlignBuffer(alignment); + const u64 uploaded_offset = buffer_offset; + + Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + + buffer_ptr += size; + buffer_offset += size; + + if (cache) { + auto entry = std::make_shared<CachedBufferEntry>(); + entry->offset = uploaded_offset; + entry->size = size; + entry->alignment = alignment; + entry->addr = *cpu_addr; + Register(entry); + } + + return uploaded_offset; +} + +u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { + AlignBuffer(alignment); + std::memcpy(buffer_ptr, raw_pointer, size); + const u64 uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return uploaded_offset; +} + +std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { + AlignBuffer(alignment); + u8* const uploaded_ptr = buffer_ptr; + const u64 uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return {uploaded_ptr, uploaded_offset}; +} + +void VKBufferCache::Reserve(std::size_t max_size) { + bool invalidate; + std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); + buffer_offset = buffer_offset_base; + + if (invalidate) { + InvalidateAll(); + } +} + +VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { + return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); +} + +void VKBufferCache::AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h new file mode 100644 index 000000000..d8e916f31 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -0,0 +1,87 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <tuple> + +#include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKMemoryManager; +class VKStreamBuffer; + +struct CachedBufferEntry final : public RasterizerCacheObject { + VAddr GetAddr() const override { + return addr; + } + + std::size_t GetSizeInBytes() const override { + return size; + } + + // We do not have to flush this cache as things in it are never modified by us. + void Flush() override {} + + VAddr addr; + std::size_t size; + u64 offset; + std::size_t alignment; +}; + +class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { +public: + explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, + VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); + ~VKBufferCache(); + + /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been + /// allocated. + u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, + bool cache = true); + + /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. + u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); + + /// Reserves memory to be used by host's CPU. Returns mapped address and offset. + std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4); + + /// Reserves a region of memory to be used in subsequent upload/reserve operations. + void Reserve(std::size_t max_size); + + /// Ensures that the set data is sent to the device. + [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); + + /// Returns the buffer cache handle. + vk::Buffer GetBuffer() const { + return buffer_handle; + } + +private: + void AlignBuffer(std::size_t alignment); + + Tegra::MemoryManager& tegra_memory_manager; + + std::unique_ptr<VKStreamBuffer> stream_buffer; + vk::Buffer buffer_handle; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 17ee93b91..0451babbf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, u64 begin, u64 end) - : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} + : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 1678463c7..a1e117443 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) { protected_resources.push_back(resource); } -void VKFence::Unprotect(const VKResource* resource) { +void VKFence::Unprotect(VKResource* resource) { const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); - if (it != protected_resources.end()) { - protected_resources.erase(it); - } + ASSERT(it != protected_resources.end()); + + resource->OnFenceRemoval(this); + protected_resources.erase(it); } VKFenceWatch::VKFenceWatch() = default; @@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() { } void VKFenceWatch::Wait() { - if (!fence) { + if (fence == nullptr) { return; } fence->Wait(); fence->Unprotect(this); - fence = nullptr; } void VKFenceWatch::Watch(VKFence& new_fence) { diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index 5018dfa44..5bfe4cead 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h @@ -63,7 +63,7 @@ public: void Protect(VKResource* resource); /// Removes protection for a resource. - void Unprotect(const VKResource* resource); + void Unprotect(VKResource* resource); /// Retreives the fence. operator vk::Fence() const { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..58ffa42f2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,90 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <optional> +#include <vector> + +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, + vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) + : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ + pipeline_stage} { + CreateBuffers(memory_manager, usage); + ReserveWatches(WATCHES_INITIAL_RESERVE); +} + +VKStreamBuffer::~VKStreamBuffer() = default; + +std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { + ASSERT(size <= buffer_size); + mapped_size = size; + + if (offset + size > buffer_size) { + // The buffer would overflow, save the amount of used buffers, signal an invalidation and + // reset the state. + invalidation_mark = used_watches; + used_watches = 0; + offset = 0; + } + + return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; +} + +VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); + + if (invalidation_mark) { + // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. + exctx = scheduler.Flush(); + std::for_each(watches.begin(), watches.begin() + *invalidation_mark, + [&](auto& resource) { resource->Wait(); }); + invalidation_mark = std::nullopt; + } + + if (used_watches + 1 >= watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(WATCHES_RESERVE_CHUNK); + } + // Add a watch for this allocation. + watches[used_watches++]->Watch(exctx.GetFence()); + + offset += size; + + return exctx; +} + +void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { + const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, + nullptr); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); + commit = memory_manager.Commit(*buffer, true); + mapped_pointer = commit->GetData(); +} + +void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { + const std::size_t previous_size = watches.size(); + watches.resize(previous_size + grow_size); + std::generate(watches.begin() + previous_size, watches.end(), + []() { return std::make_unique<VKFenceWatch>(); }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..69d036ccd --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,72 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <tuple> +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKFenceWatch; +class VKResourceManager; +class VKScheduler; + +class VKStreamBuffer { +public: + explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, + vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); + ~VKStreamBuffer(); + + /** + * Reserves a region of memory from the stream buffer. + * @param size Size to reserve. + * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer + * offset and a boolean that's true when buffer has been invalidated. + */ + std::tuple<u8*, u64, bool> Reserve(u64 size); + + /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. + [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); + + vk::Buffer GetBuffer() const { + return *buffer; + } + +private: + /// Creates Vulkan buffer handles committing the required the required memory. + void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); + + /// Increases the amount of watches available. + void ReserveWatches(std::size_t grow_size); + + const VKDevice& device; ///< Vulkan device manager. + VKScheduler& scheduler; ///< Command scheduler. + const u64 buffer_size; ///< Total size of the stream buffer. + const vk::AccessFlags access; ///< Access usage of this stream buffer. + const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. + + UniqueBuffer buffer; ///< Mapped buffer. + VKMemoryCommit commit; ///< Memory commit. + u8* mapped_pointer{}; ///< Pointer to the host visible commit + + u64 offset{}; ///< Buffer iterator. + u64 mapped_size{}; ///< Size reserved for the current copy. + + std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches + std::size_t used_watches{}; ///< Count of watches, reset on invalidation. + std::optional<std::size_t> + invalidation_mark{}; ///< Number of watches used in the current invalidation. +}; + +} // namespace Vulkan diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 044ba116a..a7ac26d71 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { - // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the - // gamma. case Tegra::RenderTargetFormat::RGBA8_SRGB: return PixelFormat::RGBA8_SRGB; case Tegra::RenderTargetFormat::RGBA8_UNORM: diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h index 39db32dbb..821b345d7 100644 --- a/src/web_service/verify_login.h +++ b/src/web_service/verify_login.h @@ -4,8 +4,6 @@ #pragma once -#include <functional> -#include <future> #include <string> namespace WebService { diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index b7737b615..40da1a4e2 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -10,7 +10,6 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/web_result.h" -#include "core/settings.h" #include "web_service/web_backend.h" namespace WebService { diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index 5f0896f84..c8b0a5ec0 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp @@ -53,15 +53,15 @@ void CompatDB::Submit() { case CompatDBPage::Final: back(); LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); - Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", - compatibility->checkedId()); + Core::System::GetInstance().TelemetrySession().AddField( + Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId()); button(NextButton)->setEnabled(false); button(NextButton)->setText(tr("Submitting")); button(QWizard::CancelButton)->setVisible(false); testcase_watcher.setFuture(QtConcurrent::run( - [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); + [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); break; default: LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 209798521..71683da8e 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp @@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() { for (unsigned int y = 0; y < surface_height; ++y) { for (unsigned int x = 0; x < surface_width; ++x) { - Math::Vec4<u8> color; + Common::Vec4<u8> color; color[0] = texture_data[x + y * surface_width + 0]; color[1] = texture_data[x + y * surface_width + 1]; color[2] = texture_data[x + y * surface_width + 2]; diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index f50225d5f..06ad74ffe 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const { return text; } -WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); - +WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table) + : mutex_address(mutex_address) { mutex_value = Memory::Read32(mutex_address); owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); owner = handle_table.Get<Kernel::Thread>(owner_handle); @@ -316,7 +315,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); if (mutex_wait_address != 0) { - list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); + const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable(); + list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table)); } else { list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); } diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index 365c3dbfe..62886609d 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h @@ -17,6 +17,7 @@ class EmuThread; namespace Kernel { +class HandleTable; class ReadableEvent; class WaitObject; class Thread; @@ -72,7 +73,7 @@ public: class WaitTreeMutexInfo : public WaitTreeExpandableItem { Q_OBJECT public: - explicit WaitTreeMutexInfo(VAddr mutex_address); + explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table); ~WaitTreeMutexInfo() override; QString GetText() const override; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 1d460c189..0f5a14841 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -561,7 +561,10 @@ void GMainWindow::InitializeHotkeys() { Settings::values.use_frame_limit = !Settings::values.use_frame_limit; UpdateStatusBar(); }); - constexpr u16 SPEED_LIMIT_STEP = 5; + // TODO: Remove this comment/static whenever the next major release of + // MSVC occurs and we make it a requirement (see: + // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html) + static constexpr u16 SPEED_LIMIT_STEP = 5; connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), &QShortcut::activated, this, [&] { if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { @@ -846,7 +849,7 @@ bool GMainWindow::LoadROM(const QString& filename) { } game_path = filename; - Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); + system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); return true; } diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index c34b5467f..c6c66a787 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -216,7 +216,7 @@ int main(int argc, char** argv) { } } - Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); + system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); system.Renderer().Rasterizer().LoadDiskResources(); |